如何识别以下代码模式

时间:2016-01-20 11:19:31

标签: javascript regex node.js promise esprima

我有一个js承诺的模式,我想识别几个关键字

例如,如果我输入如下代码:

var deferred = Q.defer();

在文件中我还有以下各自的值

deferred.reject(err);
deferred.resolve();
return deferred.promise;

完整的代码

  

示例1

function writeError(errMessage) {
    var deferred = Q.defer();
    fs.writeFile("errors.log", errMessage, function (err) {
        if (err) {
            deferred.reject(err);
        } else {
            deferred.resolve();
        }
    });
    return deferred.promise;
}

如果我把大型代码文件(作为字符串)放入其中,我希望如此 此文件包含模式

另一个例子

var d = Q.defer(); / *或$ q.defer * /

在文件中,您还有以下各自的值

d.resolve(val);
d.reject(err); 
return d.promise;
  

完成示例2

function getStuffDone(param) {           
    var d = Q.defer(); /* or $q.defer */ 

    Promise(function(resolve, reject) {
        // or = new $.Deferred() etc.        
        myPromiseFn(param+1)                 
        .then(function(val) { /* or .done */ 
            d.resolve(val);                  
        }).catch(function(err) { /* .fail */ 
            d.reject(err);                   
        });                                  
        return d.promise; /* or promise() */ 

}                  

有开源可用于进行此类分析(提供模式并且会发现...)

有一些更复杂的模式与childProcess,但现在这是好的  :)

2 个答案:

答案 0 :(得分:7)

以下正则表达式可能看起来有点可怕,但是它是根据简单的概念构建的,并且允许比您提到的更多余地 - 例如额外的空格,不同的变量名称,遗漏var等似乎适用于这两个示例 - 请查看它是否符合您的需求。

([^\s\r\n]+)\s*=\s*(?:Q|\$q)\.defer\s*\(\s*\)\s*;(?:\r?\n|.)*(?:\s|\r?\n)(?:\1\.reject\(\w+\)\s*;(?:\r?\n|.)*(?:\s|\r?\n)\1\.resolve\(\s*\w*\)\s*;|\1\.resolve\(\s*\w*\)\s*;(?:\r?\n|.)*(?:\s|\r?\n)\1\.reject\(\w+\)\s*;)(?:\r?\n|.)*(?:\s|\r?\n)return\s+(?:\1\.)?promise\s*;

Regular expression visualization

Debuggex Demo

答案 1 :(得分:3)

更新:我对代码进行了一次更正,即将set[2]更改为set[set.length - 1]以适应任何大小的查询集。然后,我将完全相同的算法应用于您的两个示例。

我提供的解决方案遵循一些我认为对您提议的搜索类型合理的规则。假设您正在寻找四行ABCD(不区分大小写,因此它会找到ABCD或abcd或aBcD):

  • 可以在单个文件中找到多个匹配集,即它将在ABCDabcd中找到两个集合。
  • 正则表达式用于单独的行,这意味着可以包含变体。 (仅作为其中一个结果,如果您在代码中的匹配行末尾有注释,则无关紧要。)
  • 所寻求的模式必须始终在不同的线上,例如A和B不能在同一条线上。
  • 匹配的集合必须完整,例如它不会找到ABC或ABD。
  • 匹配的集必须不间断,即在ABCaD中找不到任何内容。 (重要的是,这也意味着在重叠集合中找不到任何东西,例如ABCaDbcd。你可能会认为这太限制了。但是,在这个例子中,应该找到哪个,ABCD或abcd?答案是任意的并且,任意性很难编码。此外,根据您展示的示例,通常不会出现这种重叠,因此这种边缘情况似乎不太可能,这使得此限制合理。)
  • 匹配的组必须在内部不重复,例如它不会找到ABbCD。但是,使用AaBCD,找到一个集合,即它会找到一个BCD。
  • 允许使用嵌入式设置,但只能找到内部设置,例如使用ABabcdCD,只能找到abcd。

下面的代码段显示了一个示例搜索。它没有展示所有边缘情况。但是,它确实显示了整体功能。

var queryRegexStrs = [
  "I( really)? (like|adore) strawberry",
  "I( really)? (like|adore) chocolate",
  "I( really)? (like|adore) vanilla"
];

var codeStr =
  "....\n" +
  "Most people would say 'I like vanilla'\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "Amir's taste profile:\n" +
  "....\n" +
  "I like strawberry\n" +
  "....\n" +
  "....\n" +
  "I told Billy that I really adore chocolate a lot\n" +
  "....\n" +
  "I like vanilla most of the time\n" +
  "....\n" +
  "Let me emphasize that I like strawberry\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "Juanita's taste profile:\n" +
  "....\n" +
  "I really adore strawberry\n" +
  "I like vanilla\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "Rachel's taste profile:\n" +
  "I adore strawberry\n" +
  "....\n" +
  "Sometimes I like chocolate, I guess\n" +
  "....\n" +
  "I adore vanilla\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "....\n" +
  "";

// allow for different types of end-of-line characters or character sequences
var endOfLineStr = "\n";

var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);





function search(queryRegexStrs, codeStr, endOfLineStr) {

  // break the large code string into an array of line strings
  var codeLines = codeStr.split(endOfLineStr);

  // remember the number of lines being sought
  var numQueryLines = queryRegexStrs.length;

  // convert the input regex strings into actual regex's in a parallel array
  var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
    return new RegExp(queryRegexStr);
  });

  // search the array for each query line
  //   to find complete, uninterrupted, non-repeating sets of matches

  // make an array to hold potentially multiple match sets from the same file
  var matchSets = [];

  // prepare to try finding the next match set
  var currMatchSet;

  // keep track of which query line number is currently being sought
  var idxOfCurrQuery = 0;

  // whenever looking for a match set is (re-)initialized,
  //   start looking again for the first query,
  //   and forget any previous individual query matches that have been found
  var resetCurrQuery = function() {
    idxOfCurrQuery = 0;
    currMatchSet = [];
  };

  // check each line of code...
  codeLines.forEach(function(codeLine, codeLineNum, codeLines) {

    // ...against each query line
    queryRegexs.forEach(function(regex, regexNum, regexs) {

      // check if this line of code is a match with this query line
      var matchFound = regex.test(codeLine);

      // if so, remember which query line it matched
      if (matchFound) {

        // if this code line matches the first query line,
        //   then reset the current query and continue
        if (regexNum === 0) {
          resetCurrQuery();
        }

        // if this most recent individual match is the one expected next, proceed
        if (regexNum === idxOfCurrQuery) {

          // temporarily remember the line number of this most recent individual match
          currMatchSet.push(codeLineNum);

          // prepare to find the next query in the sequence
          idxOfCurrQuery += 1;

          // if a whole query set has just been found, then permanently remember
          //   the corresponding code line numbers, and reset the search
          if (idxOfCurrQuery === numQueryLines) {
            matchSets.push(currMatchSet);
            resetCurrQuery();
          }

          // if this most recent match is NOT the one expected next in the sequence,
          //   then start over in terms of starting to look again for the first query
        } else {
          resetCurrQuery();
        }
      }
    });
  });

  return matchSets;

}




// report the results
document.write("<b>The code lines being sought:</b>");
document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
document.write("<b>The code being searched:</b>");
document.write(
  "<pre><ol start='0'><li>" +
  codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
  "</li></ol></pre>"
);
document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
document.write("<b>One possible formatted output:</b>");

var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
str += "<pre>";
matchSets.forEach(function(set, setNum, arr) {
  str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
});
str += "</pre>";
document.write(str);

这是完全相同的算法,只使用原始示例1和2.请注意以下几点。首先,任何需要在正则表达式字符串中转义的内容实际上都需要双重转义,例如为了找到一个文字的左括号,你需要包含“\\(”而不仅仅是“\(”。而且,正则表达式可能看起来有点复杂。我对此有两点评论。第一:很多只是发现文字句号和括号。然而,其次,重要的是:使用复杂正则表达式的能力是整个方法的力量(读“灵活性”)的一部分。例如,您提供的示例需要一些交替,例如,“a | b“表示”找到OR b“。

var queryRegexStrs = [
  "var deferred = Q\\.defer\\(\\);",
  "deferred\\.reject\\(err\\);",
  "deferred\\.resolve\\(\\);",
  "return deferred\\.promise;"
];

var codeStr =
  'function writeError(errMessage) {'                           + "\n" +
  '    var deferred = Q.defer();'                               + "\n" +
  '    fs.writeFile("errors.log", errMessage, function (err) {' + "\n" +
  '        if (err) {'                                          + "\n" +
  '            deferred.reject(err);'                           + "\n" +
  '        } else {'                                            + "\n" +
  '            deferred.resolve();'                             + "\n" +
  '        }'                                                   + "\n" +
  '    });'                                                     + "\n" +
  '    return deferred.promise;'                                + "\n" +
  '}'                                                           + "\n" +
  '';

// allow for different types of end-of-line characters or character sequences
var endOfLineStr = "\n";

var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);





function search(queryRegexStrs, codeStr, endOfLineStr) {

  // break the large code string into an array of line strings
  var codeLines = codeStr.split(endOfLineStr);

  // remember the number of lines being sought
  var numQueryLines = queryRegexStrs.length;

  // convert the input regex strings into actual regex's in a parallel array
  var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
    return new RegExp(queryRegexStr);
  });

  // search the array for each query line
  //   to find complete, uninterrupted, non-repeating sets of matches

  // make an array to hold potentially multiple match sets from the same file
  var matchSets = [];

  // prepare to try finding the next match set
  var currMatchSet;

  // keep track of which query line number is currently being sought
  var idxOfCurrQuery = 0;

  // whenever looking for a match set is (re-)initialized,
  //   start looking again for the first query,
  //   and forget any previous individual query matches that have been found
  var resetCurrQuery = function() {
    idxOfCurrQuery = 0;
    currMatchSet = [];
  };

  // check each line of code...
  codeLines.forEach(function(codeLine, codeLineNum, codeLines) {

    // ...against each query line
    queryRegexs.forEach(function(regex, regexNum, regexs) {

      // check if this line of code is a match with this query line
      var matchFound = regex.test(codeLine);

      // if so, remember which query line it matched
      if (matchFound) {

        // if this code line matches the first query line,
        //   then reset the current query and continue
        if (regexNum === 0) {
          resetCurrQuery();
        }

        // if this most recent individual match is the one expected next, proceed
        if (regexNum === idxOfCurrQuery) {

          // temporarily remember the line number of this most recent individual match
          currMatchSet.push(codeLineNum);

          // prepare to find the next query in the sequence
          idxOfCurrQuery += 1;

          // if a whole query set has just been found, then permanently remember
          //   the corresponding code line numbers, and reset the search
          if (idxOfCurrQuery === numQueryLines) {
            matchSets.push(currMatchSet);
            resetCurrQuery();
          }

          // if this most recent match is NOT the one expected next in the sequence,
          //   then start over in terms of starting to look again for the first query
        } else {
          resetCurrQuery();
        }
      }
    });
  });

  return matchSets;

}




// report the results
document.write("<b>The code lines being sought:</b>");
document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
document.write("<b>The code being searched:</b>");
document.write(
  "<pre><ol start='0'><li>" +
  codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
  "</li></ol></pre>"
);
document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
document.write("<b>One possible formatted output:</b>");

var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
str += "<pre>";
matchSets.forEach(function(set, setNum, arr) {
  str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
});
str += "</pre>";
document.write(str);

这是完全相同的算法,只使用原始示例2:

var queryRegexStrs = [
  "var d = (Q\\.defer\\(\\)|\\$q\\.defer);",
  "d\\.resolve\\(val\\);",
  "d\\.reject\\(err\\);",
  "return d\\.promise(\\(\\))?;"
];

var codeStr =
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "function getStuffDone(param) {"               + "\n" +
  "    var d = Q.defer();"                       + "\n" +
  ""                                             + "\n" +
  "    Promise(function(resolve, reject) {"      + "\n" +
  "        // or = new $.Deferred() etc."        + "\n" +
  "        myPromiseFn(param+1)"                 + "\n" +
  "        .then(function(val) { /* or .done */" + "\n" +
  "            d.resolve(val);"                  + "\n" +
  "        }).catch(function(err) { /* .fail */" + "\n" +
  "            d.reject(err);"                   + "\n" +
  "        });"                                  + "\n" +
  "        return d.promise;"                    + "\n" +
  ""                                             + "\n" +
  "}"                                            + "\n" +
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "function getStuffDone(param) {"               + "\n" +
  "    var d = $q.defer;"                        + "\n" +
  ""                                             + "\n" +
  "    Promise(function(resolve, reject) {"      + "\n" +
  "        // or = new $.Deferred() etc."        + "\n" +
  "        myPromiseFn(param+1)"                 + "\n" +
  "        .then(function(val) { /* or .done */" + "\n" +
  "            d.resolve(val);"                  + "\n" +
  "        }).catch(function(err) { /* .fail */" + "\n" +
  "            d.reject(err);"                   + "\n" +
  "        });"                                  + "\n" +
  "        return d.promise();"                  + "\n" +
  ""                                             + "\n" +
  "}"                                            + "\n" +
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "...."                                         + "\n" +
  "";

// allow for different types of end-of-line characters or character sequences
var endOfLineStr = "\n";

var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);





function search(queryRegexStrs, codeStr, endOfLineStr) {

  // break the large code string into an array of line strings
  var codeLines = codeStr.split(endOfLineStr);

  // remember the number of lines being sought
  var numQueryLines = queryRegexStrs.length;

  // convert the input regex strings into actual regex's in a parallel array
  var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
    return new RegExp(queryRegexStr);
  });

  // search the array for each query line
  //   to find complete, uninterrupted, non-repeating sets of matches

  // make an array to hold potentially multiple match sets from the same file
  var matchSets = [];

  // prepare to try finding the next match set
  var currMatchSet;

  // keep track of which query line number is currently being sought
  var idxOfCurrQuery = 0;

  // whenever looking for a match set is (re-)initialized,
  //   start looking again for the first query,
  //   and forget any previous individual query matches that have been found
  var resetCurrQuery = function() {
    idxOfCurrQuery = 0;
    currMatchSet = [];
  };

  // check each line of code...
  codeLines.forEach(function(codeLine, codeLineNum, codeLines) {

    // ...against each query line
    queryRegexs.forEach(function(regex, regexNum, regexs) {

      // check if this line of code is a match with this query line
      var matchFound = regex.test(codeLine);

      // if so, remember which query line it matched
      if (matchFound) {

        // if this code line matches the first query line,
        //   then reset the current query and continue
        if (regexNum === 0) {
          resetCurrQuery();
        }

        // if this most recent individual match is the one expected next, proceed
        if (regexNum === idxOfCurrQuery) {

          // temporarily remember the line number of this most recent individual match
          currMatchSet.push(codeLineNum);

          // prepare to find the next query in the sequence
          idxOfCurrQuery += 1;

          // if a whole query set has just been found, then permanently remember
          //   the corresponding code line numbers, and reset the search
          if (idxOfCurrQuery === numQueryLines) {
            matchSets.push(currMatchSet);
            resetCurrQuery();
          }

          // if this most recent match is NOT the one expected next in the sequence,
          //   then start over in terms of starting to look again for the first query
        } else {
          resetCurrQuery();
        }
      }
    });
  });

  return matchSets;

}




// report the results
document.write("<b>The code lines being sought:</b>");
document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
document.write("<b>The code being searched:</b>");
document.write(
  "<pre><ol start='0'><li>" +
  codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
  "</li></ol></pre>"
);
document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
document.write("<b>One possible formatted output:</b>");

var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
str += "<pre>";
matchSets.forEach(function(set, setNum, arr) {
  str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
});
str += "</pre>";
document.write(str);