正则表达式选择不起作用

时间:2016-03-29 14:38:49

标签: javascript regex

我有一个脚本可以自动格式化提词器的脚本。它应该资本化一切(有一些例外)。但是,它也应该留下任何角度或方括号,以及括号。

以下是我创建的代码:

 s = "Prof. Dr. John Doe"
 s.scan(/Prof.|Dr.|Mr.|Mrs.|Prin.|Ms./).join("")
 => "Prof.Dr."

HTML足够简单:

<script>
String.prototype.smartUpperCase = function(){
    var pattern = /(.*?[a-z][A-Z])(.*)/g;
    if(pattern.test(this)){
        return this.replace(pattern,function(t,a,b){
            return a+b.toUpperCase();
        });
    }
    else{
        return this.toUpperCase();
    }
}
String.prototype.regexEscape = function(){ return this.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); }
String.prototype.removeBrackets = function(){ return this.replace(/[\<\>\[\]\(\)]/g, ""); }
String.prototype.format = function(returnValNoShow){
    text = this;
    orig = text; // for use in multi-line regex pattern
    text = text.replace(/(\w+)/g,function(t,w){ return w.smartUpperCase(); }); // smart uppercase everything
    text = text.replace(/\d{1,2}[st|nd|rd|th]{2}/gi, function(m){ return m.toLowerCase(); } ); // for dates (1st, 2nd, etc. will be lowecase)
    // complicated regex -> find anything inside <>, [], () and inject the original string back in
    var pattern = /.*(?=[^\<]*\>|[^\[]*\]|[^\(]*\)).*/g;
    text = text.replace( pattern, function(match){
        console.log(match);
        if(match==""){ return ""; }
        var pattern2 = new RegExp(".*(?="+match.regexEscape()+").*", "gi");
        //console.log(orig.match(pattern2));
        return orig.match(pattern2)[0];
    });

    text = text.replace(/\&/g, "AND"); // switch & for and

    text = text.replace(/ +/g, " "); // replace multiple spaces with one
    text = text.replace(/\n{3,}/g, "\n\n"); // replace 3+ line breaks with two
    text = text.replace(/\}\n{2,}/g, "}\n"); // don't allow empty line after name
    text = text.replace(/\n{2,}-+\n{2,}/g, "\n---\n"); // don't allow blank line between break (---)

    text = text.replace(/\n /g, "\n").replace(/ \n/g, "\n"); // trim() each line

    text = text.trim(); // trim whitespace on ends
    return text;
}
function f() {
    document.getElementById("in").value = document.getElementById("in").value.format();
}
</script>

99%的时间按预期工作。但是,如第二段所示,它有时无法做任何事情。

(textarea中的文字已经完成了格式化)

1 个答案:

答案 0 :(得分:1)

第一个问题是你的&#34;在括号中找到东西&#34;正则表达式:

var pattern = /.*(?=[^\<]*\>|[^\[]*\]|[^\(]*\)).*/g; //wrong

匹配整个字符串:模式的相关部分包含在&#34; lookahead&#34;断言为零宽度,仅用作布尔值yes / no。您需要以消费模式主动匹配这些序列(同时也不要通过删除.*来吃剩余的字符串),以便可以正确替换它们:

var pattern = /(\([^\(]*\)|\{[^\{]*\}|\[[^\[]*\])/g;

当您构建与原始文本匹配的替换模式时,会再次遇到此问题:

var pattern2 = new RegExp(".*(?="+match.regexEscape()+").*", "gi"); //wrong

这会再次展望match,但它被.*通配符序列所包围,所以如果匹配完全匹配,那么它就是<\ n em>整个字符串。将其更改为:

var pattern2 = new RegExp(match.regexEscape(), "gi")

现在当你进行替换时,它的工作方式就像你想要的那样... this demo shows your code working as intended