游程编码程序

时间:2016-07-23 18:35:44

标签: python python-3.x

我有一个字符串说“AAABBCCCDAAT”,因此以下输出应为A3B2C3D1A2T1。

data = input("enter data ")
count =1
for steps in range(0,len(data)-1):
    if (str(data[steps])==str(data[steps+1])):
        count = count+1
    else:
        str(data[steps])=str(data[steps+1])
        count=1
print ((str(count)+str(data[steps]))

我有一个字符串说“AAABBCCCDAAT”,因此以下输出应为A3B2C3D1A2T1。 感谢

2 个答案:

答案 0 :(得分:2)

这是一种方法,使用itertools.groupby

In [62]: from itertools import groupby

In [63]: data
Out[63]: 'AAABBCCCDAAT'

In [64]: ''.join(char + str(len(list(grp))) for char, grp in groupby(data))
Out[64]: 'A3B2C3D1A2T1'

答案 1 :(得分:0)

我有几种方法可以分享,但我认为@Warren Weckesser的答案更好。 : - )

# Regular expression approach:

import re

def run_length_encode(text):
    # When the match is for AAA:
    # match.group(1) == 'A'
    # match.group(0) == 'AAA'
    # len(match.group(0)) == 3
    def encode_single_run(match):
        return '{}{}'.format(match.group(1), len(match.group(0)))

    # Replace each occurrence of a letter followed by any number of identical letters with the
    # output of encode_single_run on that match
    return re.sub(r'(.)\1*', encode_single_run, text)

assert run_length_encode('AAABBCCCDAAT') == 'A3B2C3D1A2T1'


# While loop approach:

def run_length_encode(text):
    result = []

    position = 0

    # Until we pass the end of the string
    while position < len(text):

        # Grab the current letter
        letter = text[position]
        # Advance our current position
        position += 1
        # Start at a count of 1 (how many times the letter occurred)
        count = 1

        # While we haven't fallen off the array and the letter keeps repeating
        while position < len(text) and text[position] == letter:
            # Move forward
            count += 1
            position += 1

        # Append the letter and count to the result
        result.append((letter, count))

    # At this point, result looks like [('A', 3), ('B', 2), ...]
    # This next line combines the letters and counts and then puts everything into a single string
    return ''.join('{}{}'.format(letter, count) for letter, count in result)

assert run_length_encode('AAABBCCCDAAT') == 'A3B2C3D1A2T1'


# State machine approach:

def run_length_encode(text):
    result = []

    # Keeps track of the letter we're in the midst of a run of
    current_letter = None
    # Keeps track of how many times we've seen that letter
    count = 0

    # Iterate over each letter in the string
    for letter in text:

        # If it's the same letter we're already tracking
        if letter == current_letter:
            count += 1
        else:
            # Unless it's the very start of the process
            if count > 0:
                result.append((current_letter, count))

            # Start tracking the new letter
            current_letter = letter
            count = 1

    # Without this, we'd fail to return the last letter    
    result.append((current_letter, count))

    # At this point, result looks like [('A', 3), ('B', 2), ...]
    # This next line combines the letters and counts and then puts everything into a single string
    return ''.join('{}{}'.format(letter, count) for letter, count in result)

assert run_length_encode('AAABBCCCDAAT') == 'A3B2C3D1A2T1'