Java正则表达式上的StackOverFlow错误

时间:2015-09-03 17:42:07

标签: java regex

以下JavaScript是我试图提取的html页面的一部分。我正在使用Java来实现这一点,并且我提供了我正在使用的方法。

以下是我想从HTML页面中提取的JavaScript      

    window.arMailRuMessages = [];

    arMailRuMessages = (function() {
        var k = 1024,
            u = ajs.Html.unescape,
            m = function(data) {
                try {
                    return u(decodeURIComponent(data.text));
                } catch (e) {}
                return '';
            };

        return [

            {
                id: "14412430340000000392",
                prev: "",
                next: "14412428590000000596",
                subject: u("hi"),
                date: "1441243034",
                size: "3" | 0,
                folder: "0",
                correspondents: {
                    from: [{
                        name: u("firstname lastname"),
                        email: u("firstname@gmail.com"),
                        avatars: {
                            "default": u("\/\/filin.mail.ru\/pic?email=firstname%40gmail.com&trust=true&user=firstname%40mail.ru&sign=CA0D4E8E74E806A459EA9C793CE8BC665EB2D049")
                        }
                    }],
                    to: [{
                        name: u(""),
                        email: u("firstname6000@mail.ru"),
                        avatars: {
                            "default": u("")
                        }
                    }],
                    cc: []
                },
                flags: {
                    spf: true,
                    unread: true,
                    flagged: false,
                    reply: false,
                    forward: false,
                    attach: false
                },
                snippet: m({
                    "ntype": "letter",
                    "text": "thisisaford"
                }),
                priority: 3
            }, {
                id: "14412428590000000596",
                prev: "14412430340000000392",
                next: "",
                subject: u("hi"),
                date: "1441242859",
                size: "3" | 0,
                folder: "0",
                correspondents: {
                    from: [{
                        name: u("firstname lastname"),
                        email: u("firstname@gmail.com"),
                        avatars: {
                            "default": u("\/\/filin.mail.ru\/pic?email=firstname%40gmail.com&trust=true&user=firstname%40mail.ru&sign=CA0D4E8E74E806A459EA9C793CE8BC665EB2D049")
                        }
                    }],
                    to: [{
                        name: u(""),
                        email: u("firstname@mail.ru"),
                        avatars: {
                            "default": u("")
                        }
                    }],
                    cc: []
                },
                flags: {
                    spf: true,
                    unread: true,
                    flagged: false,
                    reply: false,
                    forward: false,
                    attach: false
                },
                snippet: m({
                    "ntype": "letter",
                    "text": "thisisatest"
                }),
                priority: 3
            }
        ];
    })();
    __log.letters_data_js = 1;
</script>

我已创建此Java方法以从HTML页面中提取脚本

public String jsMessages(String content)
{
    String result = "";

    String pattern = "window.arMailRuMessages((.|\\n)*)__log.letters_data_js";
    Pattern r = Pattern.compile(pattern);

    Matcher m = r.matcher(content);
    if (m.find())
    {
        // System.out.println("Found value: " + m.group(1));
        result = m.group(1);
    }
    else
    {
        System.out.println("NO MATCH");
    }

    return result;
}

当我运行程序时,我收到以下错误

Exception in thread "Thread-0" java.lang.StackOverflowError
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)
    at java.util.regex.Pattern$CharProperty.match(Unknown Source)
    at java.util.regex.Pattern$Branch.match(Unknown Source)
    at java.util.regex.Pattern$GroupHead.match(Unknown Source)
    at java.util.regex.Pattern$Loop.match(Unknown Source)
    at java.util.regex.Pattern$GroupTail.match(Unknown Source)
    at java.util.regex.Pattern$BranchConn.match(Unknown Source)

知道我做错了什么吗?或者更好的方法吗?

1 个答案:

答案 0 :(得分:4)

请勿使用(.|\\n)*。使用.*Pattern.compile(pattern, Pattern.DOTALL)

根据您的使用情况,您可能希望将其设为“不情愿”而不是“贪婪”:.*?
也应该提升表现。