编码字符串" aaa"到" 3 [a]"

时间:2016-11-17 01:12:23

标签: string algorithm

给出一个字符串s,按格式编码:" aaa"至" 3 [a]"。编码字符串的长度应该最短。 例如:" abbabb"到" 2 [a2 [b]]"

更新:假设字符串只包含小写字母

更新:这是我在c ++中的代码,但速度很慢。我知道其中一项改进是使用KMP计算当前字符串是否由重复字符串组合。

// this function is used to check if a string is combined by repeating a substring. 
// Also Here can be replaced by doing KMP algorithm for whole string to improvement

bool checkRepeating(string& s, int l, int r, int start, int end){  
    if((end-start+1)%(r-l+1) != 0)
        return false;
    int len = r-l+1;
    bool res = true;
    for(int i=start; i<=end; i++){
        if(s[(i-start)%len+l] != s[i]){
            res = false;
            break;
        }
    }
    return res;
}

// this function is used to get the length of the current number
int getLength(int l1, int l2){
    return (int)(log10(l2/l1+1)+1);
}

string shortestEncodeString(string s){
    int len = s.length();

    vector< vector<int> > res(len, vector<int>(len, 0));
    //Initial the matrix 
    for(int i=0; i<len; i++){
        for(int j=0; j<=i; j++){
            res[j][i] = i-j+1;
        }
    }

    unordered_map<string, string> record;

    for(int i=0; i<len; i++){
        for(int j=i; j>=0; j--){

            string temp = s.substr(j, i-j+1);
/* if the current substring has showed before, then no need to compute again
 * Here is a example for this part: if the string is "abcabc".
 * if we see the second "abc", then no need to compute again, just use the
 * result from first "abc".
**/
            if(record.find(temp) != record.end()){
                res[j][i] = record[temp].size();
                continue;
            }
            string ans = temp;
            for(int k=j; k<i; k++){

                string str1 = s.substr(j, k-j+1);
                string str2 = s.substr(k+1, i-k);
                if(res[j][i] > res[j][k] + res[k+1][i]){
                    res[j][i] = res[j][k]+res[k+1][i];
                    ans = record[str1] + record[str2];
                }

                if(checkRepeating(s, j, k, k+1, i) == true && res[j][i] > 2+getLength(k-j+1, i-k)+res[j][k]){
                    res[j][i] = 2+getLength(k-j+1, i-k)+res[j][k];
                    ans = to_string((i-j+1)/(k-j+1)) + '[' + record[str1] +']';
                }
            }
            record[temp] = ans;
        }

    }

    return record[s];
}

1 个答案:

答案 0 :(得分:0)

在问题陈述方面很少开头,我使用JavaScript快速尝试了这一点,因为它很容易演示。注释在代码中,但基本上有连接相邻元素的交替阶段,运行长度检查,连接相邻元素,以及直到只剩下一个元素 - 最终编码值。

quick algorithm sketch

我希望这会有所帮助。

&#13;
&#13;
function encode(str) {
  var tmp = str.split('');
  var arr = [];

  do {
    if (tmp.length === arr.length) {
      // Join adjacent elements
      arr.length = 0;
      for (var i = 0; i < tmp.length; i += 2) {
        if (i < tmp.length - 1) {
          arr.push(tmp[i] + tmp[i + 1]);
        } else {
          arr.push(tmp[i]);
        }
      }
      tmp.length = 0;
    } else {
      // Swap arrays and clear tmp
      arr = tmp.slice();
      tmp.length = 0;
    }

    // Build up the run-length strings
    for (var i = 0; i < arr.length;) {
      var runlength = runLength(arr, i);
      if (runlength > 1) {
        tmp.push(runlength + '[' + arr[i] + ']');
      } else {
        tmp.push(arr[i]);
      }
      i += runlength;
    }
    console.log(tmp);
  } while (tmp.length > 1);
  return tmp.join();
}

// Get the longest run length from a given index
function runLength(arr, ind) {
  var count = 1;
  for (var i = ind; i < arr.length - 1; i++) {
    if (arr[i + 1] === arr[ind]) {
      count++;
    } else {
      break;
    }
  }
  return count;
}
&#13;
<input id='inp' value='abbabb'>
<button type="submit" onClick='javascript:document.getElementById("result").value=encode(document.getElementById("inp").value)'>Encode</button>
<br>
<input id='result' value='2[a2[b]]'>
&#13;
&#13;
&#13;