在JavaScript中自动释义句子

时间:2013-05-18 17:48:50

标签: javascript regex

在JavaScript中,是否可以使用该正则表达式的随机生成匹配自动替换句子中的正则表达式?我正在尝试使用这种方法使用正则表达式列表自动释义句子,如下所示:

replaceWithRandomFromRegexes("You aren't a crackpot! You're a prodigy!", ["(genius|prodigy)", "(freak|loony|crackpot|crank|crazy)", "(You're |You are |Thou art )", "(aren't|ain't|are not)"])

此处,输入字符串中每个正则表达式的每个匹配项应替换为正则表达式的随机生成匹配。

function replaceWithRandomFromRegexes(theString, theRegexes){
    //For each regex in theRegexes, replace the first match of the regex in the string with a randomly generated match of that regex.
}

2 个答案:

答案 0 :(得分:10)

这似乎比你想象的要简单得多。怎么样:

function randomReplace(subject, groups, wordsOnly) {
    var meta = /([.?*+^$[\]\\(){}|-])/g, all = {};
    groups.forEach(function(group) {
        group.forEach(function(word) { all[word] = group })
    });
    var r = Object.keys(all).
        sort(function(x, y) { return y.length - x.length }).
        map(function(x) { return x.replace(meta, "\\$&") }).
        join("|");
    if(wordsOnly)
        r = "\\b(" + r + ")\\b";
    return subject.replace(new RegExp(r, "g"), function($0) {
        return all[$0][Math.floor(Math.random() * all[$0].length)]
    });
}

示例:

s = randomReplace(
    "You aren't a crackpot! You're a prodigy!",
    [
        ["genius", "prodigy"], 
        ["freak", "loony", "crackpot", "crank", "crazy"], 
        ["You're ", "You are ", "Thou art "], 
        ["aren't", "ain't", "are not"]
    ]
);
console.log(s) // You ain't a crank! Thou art a genius!

评论中讨论的扩展功能可能是这样的:

function expand(s) {
    var d = [];

    function product(a, b) {
        var p = [];
        a.map(function(x) { b.map(function(y) { p.push(x + y) })});
        return p;
    }

    function reduce(s) {
        var m;
        if(s.indexOf("|") >= 0)
            return [].concat.apply([], s.split("|").map(reduce));
        if(m = s.match(/~(\d+)(.*)/))
            return product(reduce(d[m[1]]), reduce(m[2]));
        return [s];
    }

    function add($0, $1) { d.push($1); return '~' + (d.length - 1) }

    s = s.replace(/([^()|]+)/g, add);
    for(var r = /\(([^()]*)\)/g; s.match(r);)
        s = s.replace(r, add);

    return reduce(s);
}

示例:

z = "(He|She|It|(B|R)ob(by|)) (real|tru|sure)ly is"
console.log(expand(z)) 

结果:

[
 "He really is",
 "He truly is",
 "He surely is",
 "She really is",
 "She truly is",
 "She surely is",
 "It really is",
 "It truly is",
 "It surely is",
 "Bobby really is",
 "Bobby truly is",
 "Bobby surely is",
 "Bob really is",
 "Bob truly is",
 "Bob surely is",
 "Robby really is",
 "Robby truly is",
 "Robby surely is",
 "Rob really is",
 "Rob truly is",
 "Rob surely is"
]

答案 1 :(得分:5)

是的,这肯定是可能的。我创建了一个名为replaceWithRandomFromRegexes的函数来完成这项任务。

http://jsfiddle.net/KZyZW/2/

for(var i = 0; i < 10; i++){
document.body.innerHTML += (replaceWithRandomFromRegexes("You aren't a crackpot! You're a prodigy!", ["(genius|prodigy)", "(freak|loony|crackpot|crank|crazy)", "(You're |You are |Thou art )", "(aren't|ain't|are not)"]))+"<br/>";
}

function replaceWithRandomFromRegexes(theString, theRegexes) {
    //alert(theRegexes);
    for (var i = 0; i < theRegexes.length; i++) {
        theString = globalReplaceWithRandomFromRegex(theString, theRegexes[i]);
        //alert(theRegexes[i]);
    }
    //alert("All the regexes: " + theRegexes);
    return theString;
}

function globalReplaceWithRandomFromRegex(theString, theRegexString) {
    var theRegex = new RegExp(theRegexString, "gi");
    //replace all matches of theRegex with '<thing to replace>'
    theString = theString.replace(theRegex, "<thing to replace>")


    //replace the first match of '<thing>'

    while (theString.indexOf("<thing to replace>") != -1) {
        theString = theString.replace("<thing to replace>", getRandomStringFromNestedParentheses(theRegexString));
    }

    //alert(theString);
    return theString;
}

function getRandomStringFromNestedParentheses(theString) {
    while (theString.indexOf("(") != -1) {
        theString = replaceInFirstParentheses(theString);
    }
    return theString;
}

function replaceInFirstParentheses(theString) {
    //find the index of the first parentheses
    var parenthesesIndex = theString.indexOf("(");
    var randomString = getRandomStringInsideParentheses(theString, parenthesesIndex);
    //alert(randomString);

    //theString = theString.replace();

    //find the string to replace
    var stringToReplace = theString.substring(parenthesesIndex, getCorrespondingParenthesesIndex(theString, parenthesesIndex) + 1);
    //alert(stringToReplace);
    theString = theString.replace(stringToReplace, randomString);
    //alert(theString);
    return theString;
}

function getRandomStringInsideParentheses(string, parenthesesIndex) {
    var stringArray = getStringsInsideParentheses(string, parenthesesIndex)
    //find out how to pick random in a range
    //pick something random from the array declared above
    //var theMin = 0;
    //var theMax = stringArray.length-1;
    var randomNumber = Math.floor(Math.random() * stringArray.length);
    return stringArray[randomNumber];
}

function getStringsInsideParentheses(string, parenthesesIndex) {
    //alert("calling function getStringsInsideParentheses");
    var theString = getStringFromParentheses(string, parenthesesIndex);
    for (var i = 0; i < theString.length; i++) {
        var theParenthesesNum = getParenthesesNum(theString, i);
        if (theString[i] == '|') {
            //alert("Parentheses num: " + theParenthesesNum);
            if (theParenthesesNum == 0) {
                theString = theString.substring(0, i) + "|" + theString.substring(i, theString.length);
                i++;
            }
        }
    }
    //alert(theString);
    return theString.split("||")
}

function getStringFromParentheses(theString, parenthesesIndex) {
    return theString.substring(parenthesesIndex + 1, getCorrespondingParenthesesIndex(theString, parenthesesIndex))
}

function getCorrespondingParenthesesIndex(theString, openingParenthesesIndex) {
    if (!parenthesesAreMatching(theString)) {
        writeMessage("Error: The parentheses do not match!");
        return false;
    }
    if (theString.charAt(openingParenthesesIndex) != "(") {
        writeMessage("Error: The index must be an opening parentheses!");
        return false;
    }
    var num = 0;
    for (var i = openingParenthesesIndex; i < theString.length; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
        if (num == 0) {
            return i;
        }
    }
    writeMessage("Error: The parentheses do not match!");
    return false;
}

function parenthesesAreMatching(theString) {
    var num = 0;
    for (var i = 0; i < theString.length; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
    }
    if (num == 0) {
        return i;
    } else {
        return false;
    }
}

function getParenthesesNum(theString, index) {
    //this should be based on parenthesesAreMatching, but stopping at index
    var num = 0;
    for (var i = 0; i < index; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
    }
    return num;
}

原始句子是You aren't a crackpot! You're a prodigy!以下是这个句子的一些自动释义版本,由此脚本生成:

You are not a freak! Thou art a genius!
You aren't a crackpot! You're a prodigy!
You ain't a crackpot! You are a genius!
You ain't a freak! You're a prodigy!
You are not a crackpot! You are a prodigy!
You are not a loony! You're a prodigy!
You are not a loony! You are a genius!
You are not a loony! You are a prodigy!
You ain't a crackpot! Thou art a prodigy!
You are not a loony! Thou art a prodigy!