就地进行字符串操作以进行字符计数

时间:2018-10-27 09:48:39

标签: string algorithm in-place

给出一个字符串,用立即数替换每个字符。 例如:“ aaabbbcc”-“ a3b3c2”       “ abab”-“ a1b1a1b1”

3 个答案:

答案 0 :(得分:0)

您可以执行以下操作

function getResult(input) {
  if(!input) {
   return '';                      // if string is empty, return ''
  }
  if (input.length === 1) {        // if string length is 1, return input + '1'
   return input + '1'
  }

  var inputLength = input.length; // total length of the input string
  var preChar = input[0];         // track previous character
  var count = 1;                  // count of the previous character

  for (var i = 1; i < inputLength; i++) {

    if (preChar === input[i]) {  // previous char and current char match
      preChar = input[i];        // update prevChar with current char
      count++;                   // count of prevChar is incremented
      if (i + 1 < inputLength) { // not the end of the string 
        input = input.substr(0, i) + '#' + input.substr(i + 1); // update with dummy char '#' to maintain the length of the string, later will be removed
      } else {                   // if it is the end of the string, update with count
        input = input.substr(0, i) + count + input.substr(i + 1);
      }
      continue;
    }

    // if current char is not matched with prevChar
    preChar = input[i];             // update the preChar with current char
     
    // update the input with prevChar count and current char
    input = input.substr(0, i) + count + input[i] + input.substr(i + 1); 
    inputLength++;                 // new count is added in the string, so total length of the string is increased by one
    i++;                           // increment the i also because total length is increased
    count = 1;                     // update the count with 1
    if (i + 1 === inputLength) {       // if it is last element     
      input = input.substr(0, i+1) + count + input.substr(i + 2); 
    }
  }
  input = input.replace(/\#/g, '') // remove dummy char '#' with ''
  return input;                    // return the input
}

console.log("result for aaabbbcc is ", getResult('aaabbbcc'))
console.log("result for abab is ", getResult('abab'))

答案 1 :(得分:0)

此答案假设OP使用ASCII

OP需求分析

所需字节的分析:

  1. 只有1个重复时,需要2个字节。

  2. 当有2个重复时,需要2个字节。

  3. 当有3个字节时,需要2个字节。

  4. 当有9个字节时,为2个字节。

  5. 当有99个字节时,为3个字节。

  6. 当有999个时,为4个字节。

  7. 当重复(2 ^ 64-1)次时,仅需要20个字节。

如您所见,表示整数所需的字节为log10(N)(至少1),其中N是重复次数。

重复一次

因此,当只有1个重复并且没有可用空间时,它必须保持迭代直到

  1. 具有比所需更多字节的序列(至少重复3次)以为其留有空间

  2. reallocate结束时 (在一个字符串中,大多数字符串至少重复两次或三次,而那些仅重复一次的字符串不会聚集在一个地方,这很少是必需的。此算法进行此假设)。

然后,将向后修改字符串以防止任何数据覆盖。之所以起作用,是因为对于每个重复一次的序列,它将占用双字节来存储它们,因此,当i处的序列将被写入2i时。

C++14代码

#include <algorithm>

#include <cassert>
#include <utility>
#include <type_traits>

// Definition of helper functions for converting ints to string(raw and no need for format string).

template <class size_t, class UnsignedInt,
          class = std::enable_if_t<std::is_unsigned<UnsignedInt>::value>>
size_t to_str_len(UnsignedInt i) noexcept
{
    size_t cnt = 0;

    do {
        ++cnt;
        i /= 10;
    } while (i > 0);

    return cnt;
}

/*
 * out should either points to the beginning of array   or the last.
 * step should be either 1 or -1.
 */
template <class RandomAccessIt, class UnsignedInt,
          class = std::enable_if_t<std::is_unsigned<UnsignedInt>::value>>
auto uitos_impl(RandomAccessIt out, int step, UnsignedInt i) noexcept
{
    do {
        *out = static_cast<char>(i % 10) + 48;
        out += step;
        i /= 10;
    } while (i > 0);

    return out;
}

template <class BidirectionalIt>
void reverse_str(BidirectionalIt beg, BidirectionalIt end) noexcept
{
    do {
        std::iter_swap(beg++, --end);
    } while (beg < end);
}

template <class RandomAccessIt, class UnsignedInt>
auto uitos(RandomAccessIt beg, UnsignedInt i) noexcept
{
    auto ret = uitos_impl(beg, 1, i);

    // Reverse the string to get the right order
    reverse_str(beg, ret);

    return ret;
}

template <class RanIt, class UnsignedInt>
auto r_uitos(RanIt last, UnsignedInt i) noexcept
{
    return uitos_impl(last, -1, i);
}

template <class size_t, class RandomAccessIt>
size_t count_repeat(RandomAccessIt beg, RandomAccessIt end) noexcept
{
    auto first = beg;
    auto &val = *beg;
    do {
        ++beg;
    } while (beg != end && *beg == val);

    return beg - first;
}

template <class size_t, class RandomAccessIt>
size_t r_count_repeat(RandomAccessIt last) noexcept
{
    auto it = last;
    auto &val = *it;
    do {
        --it;
    } while (*it == val);

    return last - it;
}

template <class string,
          class size_type = typename string::size_type>
struct character_count
{
    static_assert(std::is_unsigned<size_type>::value,
                     "size_type should be unsigned");
    static_assert(!std::is_empty<size_type>::value,
                     "size_type should not be empty");

private:
    using str_size_t = typename string::size_type;
    using str_it = typename string::iterator;

    static_assert(std::is_same<typename string::value_type, char>::value,
                     "Only support ASCII");
    static_assert(sizeof(size_type) <= sizeof(str_size_t),
                     "size_type should not be bigger than typename string::size_type");


    string str;

    str_it in;
    str_it out;
    str_it end;
    size_type len;

    void cnt_repeat() noexcept
    {
        assert(in != end);
        len = count_repeat<size_type>(in, str.end());
    }

    void advance_in() noexcept
    {
        assert(in != end);
        in += len;
        assert(in <= end);
    }

    void process_impl()
    {
        assert(in != end);
        assert(out <= in);

        // The main loop
        do {
            cnt_repeat();
            if (len > 1 || out < in) {
                *out = *in;
                out = uitos(out + 1, len);

                advance_in();

                assert(out <= in);
            } else {
                auto first = find_enough_space();
                auto deduced_first = write_backward();

                assert(first == deduced_first);
            }
        } while (in != end);
    }

    auto find_enough_space()
    {
        assert(out == in);

        auto first = in;
        size_type bytes_required = 0;

        do {
            cnt_repeat();

            advance_in();
            bytes_required += to_str_len<size_type>(len) + 1;

            if (size_type(in - first) >= bytes_required) {
                out = first + bytes_required;
                return first;
            }
        } while (in != str.end());

        auto first_offset = first - str.begin();
        // Hopefully this path won't be executed.
        auto new_size = first_offset + bytes_required;

        auto original_size = str.size();
        assert(new_size > original_size);

        str.resize(new_size);

        first = str.begin() + first_offset;
        out = str.end();
        in = str.begin() + original_size;
        end = str.begin() + original_size;

        return first;
    }

    auto write_backward() noexcept
    {
        auto original_out = out--;
        auto original_in = in--;

        do {
            len = r_count_repeat<size_type>(in);

            out = r_uitos(out, len);
            *out-- = *((in -= len) + 1);
        } while (out != in);

       auto ret = out + 1;

       out = original_out;
       in = original_in;

       return ret;
    }

public:
    character_count(string &&arg):
        str(std::move(arg)), in(str.begin()), out(str.begin()), end(str.end())
    {}

    character_count(const string &arg):
        str(arg), in(str.begin()), out(str.begin()), end(str.end())
    {}

    /*
     * ```str``` should not be empty and should not have non-visible character
     */
    auto& process()
    {
        assert(!str.empty());
        process_impl();

        str.erase(out, str.end());

        return *this;
    }

    auto& get_result() & noexcept
    {
        return str;
    }
    auto&& get_result() && noexcept
    {
        return std::move(str);
    }
    auto& get_result() const& noexcept
    {
        return str;
    }

    /*
     * ```str``` should not be empty and should not have non-visible character
     */
    auto& set_string(const string &arg) noexcept
    {
        str = arg;

        in = str.begin();
        out = str.begin();
        end = str.end();

        return *this;
    }
    /*
     * ```str``` should not be empty and should not have non-visible character
     */
    auto& set_string(string &&arg) noexcept
    {
        str = std::move(arg);

        in = str.begin();
        out = str.begin();
        end = str.end();

        return *this;
    }
};

答案 2 :(得分:-1)

假设您正在使用Javascript,这就是我要做的。只需将所有组与正则表达式匹配并遍历它们即可:

function charCount (str) {
  const exp = /(\w)\1*/g
  let matches = (str).match(exp)
  let output = ''

  matches.forEach(group => {
    output += group[0] + group.length
  })

  return output
}

const str = 'abab'
const str2 = 'aaabbbcc'

console.log(charCount(str))   // a1b1a1b1
console.log(charCount(str2))  // a3b3c2