查找包含较大字符串中给定字母集的最小子字符串

时间:2015-12-20 12:36:12

标签: javascript regex string substring

假设您有以下字符串:

FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNT
LDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFY
FFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQ
XBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGR
AMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR

我正在尝试找到包含字母ABCDA的最小子字符串。

我尝试了正则表达式方法。

console.log(str.match(/[A].*?[B].*?[C].*?[D].*?[A]/gm).sort((a, b) => a.length - b.length)[0]);

这样可行,但它只能找到ABCDA出现的字符串(按此顺序)。这意味着它不会找到字母以这样的顺序出现的子字符串:BCDAA

我正在尝试更改我的正则表达式来解释这一点。如果不使用|并输入所有不同的案例,我该怎么做?

4 个答案:

答案 0 :(得分:3)

你不能。

让我们考虑一个特例:假设您要找的字母是AAB。在正则表达式的某个时刻,肯定会有B。但是,B左侧和右侧的部分彼此独立,因此您无法从一个参考到另一个。在A右侧的子表达式中匹配了多少个B取决于左侧部分中已匹配的A的数量。这对于正则表达式是不可能的,因此您必须展开所有不同的订单,这可能很多!

说明问题的另一个流行示例是将左括号与右括号匹配。不可能编写一个正则表达式断言,在给定的字符串中,一系列左括号后跟一系列相同长度的右括号。这样做的原因是,与有限状态机相比,计算括号需要堆栈机器,但正则表达式仅限于可以使用FSM匹配的模式。

答案 1 :(得分:1)

也许没有使用正则表达式那么明确(好吧,对我来说,正则表达式从来都不是很清楚:D)你可以使用蛮力(不那么粗暴)

创建一个字符串“有效”点的索引(那些带有你想要的字母的点),并用一个双循环遍历它,得到包含至少5个这些点的子串,检查它们是否是有效的解决方案。也许不是最有效的方式,但易于实施,理解,并可能优化。

var PostSchema = new Schema({
    title: String,
    url: {type: String, unique: true, default: (() => {
        let gen = "", possible = "QWERTYUIOPASDFGHJKLZXCVBNMqwertyuiopasdfghjklzxcvbnm0123456789";
        range(100).forEach((value, index) => gen += possible.charAt(Math.floor(Math.random() * possible.length)));
        return gen;
    })()},
    ...
});

答案 2 :(得分:1)

此算法不使用正则表达式,但也找到了两种解决方案。

var haystack = 'FJKAUNOJDCUTCRHBYDLXKEODVBWTYPTSHASQQFCPRMLDXIJMYPVOHBDUGSMBLMVUMMZYHULSUIZIMZTICQORLNTOVKVAMQTKHVRIFMNTSLYGHEHFAHWWATLYAPEXTHEPKJUGDVWUDDPRQLUZMSZOJPSIKAIHLTONYXAULECXXKWFQOIKELWOHRVRUCXIAASKHMWTMAJEWGEESLWRTQKVHRRCDYXNTLDSUPXMQTQDFAQAPYBGXPOLOCLFQNGNKPKOBHZWHRXAWAWJKMTJSLDLNHMUGVVOPSAMRUJEYUOBPFNEHPZZCLPNZKWMTCXERPZRFKSXVEZTYCXFRHRGEITWHRRYPWSVAYBUHCERJXDCYAVICPTNBGIODLYLMEYLISEYNXNMCDPJJRCTLYNFMJZQNCLAGHUDVLYIGASGXSZYPZKLAWQUDVNTWGFFYFFSMQWUNUPZRJMTHACFELGHDZEJWFDWVPYOZEVEJKQWHQAHOCIYWGVLPSHFESCGEUCJGYLGDWPIWIDWZZXRUFXERABQJOXZALQOCSAYBRHXQQGUDADYSORTYZQPWGMBLNAQOFODSNXSZFURUNPMZGHTAJUJROIGMRKIZHSFUSKIZJJTLGOEEPBMIXISDHOAIFNFEKKSLEXSJLSGLCYYFEQBKIZZTQQXBQZAPXAAIFQEIXELQEZGFEPCKFPGXULLAHXTSRXDEMKFKABUTAABSLNQBNMXNEPODPGAORYJXCHCGKECLJVRBPRLHORREEIZOBSHDSCETTTNFTSMQPQIJBLKNZDMXOTRBNMTKHHCZQQMSLOAXJQKRHDGZVGITHYGVDXRTVBJEAHYBYRYKJAVXPOKHFFMEPHAGFOOPFNKQAUGYLVPWUJUPCUGGIXGRAMELUTEPYILBIUOCKKUUBJROQFTXMZRLXBAMHSDTEKRRIKZUFNLGTQAEUINMBPYTWXULQNIIRXHHGQDPENXAJNWXULFBNKBRINUMTRBFWBYVNKNKDFR';
var needle = 'ABCDA'; // the order of letters doesn't matter

var letters = {};
needle.split('').forEach(function(ch) {
  letters[ch] = letters[ch] || 0;
  letters[ch]++;
});

var shortestSubstringLength = haystack.length;
var shortestSubstrings = []; // storage for found substrings

var startingPos = 0;
var length;
var currentPos;
var notFound;
var letterKeys = Object.keys(letters); // unique leters
do {
  lettersLeft = JSON.parse(JSON.stringify(letters)); // copy letters count object
  notFound = false;
  posStart = haystack.length;
  posEnd = 0;
  letterKeys.forEach(function(ch) {
    currentPos = startingPos;
    while (!notFound && lettersLeft[ch] > 0) {
      currentPos = haystack.indexOf(ch, currentPos);
      if (currentPos >= 0) {
        lettersLeft[ch]--;
        posStart = Math.min(currentPos, posStart);
        posEnd = Math.max(currentPos, posEnd);
        currentPos++;
      } else {
        notFound = true;
      }
    }
  });
  if (!notFound) {
    length = posEnd - posStart + 1;
    startingPos = posStart + 1; // starting position for next iteration
  }
  if (!notFound && length === shortestSubstringLength) {
    shortestSubstrings.push(haystack.substr(posStart, length));
  }
  if (!notFound && length < shortestSubstringLength) {
    shortestSubstrings = [haystack.substr(posStart, length)];
    shortestSubstringLength = length;
  }
} while (!notFound);

console.log(shortestSubstrings);

答案 3 :(得分:0)

刚刚在面试中作为编码任务遇到了这个问题,并提出了另一个解决方案,(它不如上面的那个最佳,但也许更容易理解)。

function MinWindowSubstring(strArr) { 

  const N = strArr[0];
  const K = strArr[1];

  const letters = {};

  K.split('').forEach( (character) => {
    letters[character] = letters[character] ? letters[character] + 1 : 1;
  });

  let possibleSequencesList = [];
  
  const letterKeys = Object.keys(letters);

  for(let i=0; i< N.length; i++) {

    const char = N[i];

    if (new String(letterKeys).indexOf(char) !== -1) {
    
      // found a character in the string 

      // update all previus sequences
      possibleSequencesList.forEach((seq) => {
        if(!seq.sequenceComplete) {
          seq[char] = seq[char]-1;
          seq.lastIndex = i;

          // check if sequence is complete
          var sequenceComplete = true;
          letterKeys.forEach( (letter) => {
            if(seq[letter] > 0) {
              sequenceComplete = false;
            }
          });

          seq.sequenceComplete = sequenceComplete
        }
      })

      // create a new sequence starting from it 
      const newSeq = {
        startPoint: i,
        lastIndex: i,
        sequenceComplete: false,
        ...letters
      }

      newSeq[char] = newSeq[char]-1;

      possibleSequencesList.push(newSeq);
    }
  }

  // cleanup sequences 
  let sequencesList = possibleSequencesList.filter(sequence => sequence.sequenceComplete);
  
  let output = [];

  let minLength = N.length;
  // find the smalles one
  sequencesList.forEach( seq => {
      if( (seq.lastIndex - seq.startPoint) < minLength) {
        minLength = seq.lastIndex - seq.startPoint;
        output = N.substring(seq.startPoint, seq.lastIndex + 1);
      }
  })
   
  return output; 
}