我有一个像这样的字符串db.posts.aggregate([
{$sample:{"size": 50 }},
{$match: {"content": { $ne: "" }}},
{$group: {"_id" : "$_id", "post" : { "$push" : "$$ROOT" }}},
{$unwind : "$post" }
])
我需要这样的输出str = "aabcccdfffeeeeettaaaattiioccc"
到目前为止,我已经这样尝试过了:
Result ={aa: 1;b:1;ccc:2;d:1;fff:1;eeeee:1;tt:2;aaaa:1;ii:1;o:1;ccc:1}
答案 0 :(得分:1)
我将按照以下步骤操作:
collections.Counter
关于您的代码,the_string[i] == the_string[i]
将始终为真。
from collections import Counter
def repeating_letters(the_string):
partials = []
initial = 0
for i, character in enumerate(the_string):
if character == the_string[initial]:
continue
partials.append(the_string[initial:i])
initial = i
partials.append(the_string[initial:]) # Needed for the last partial string
return Counter(partials)
就像@prahantrana在评论中提到的那样,可以使用groupby
库中的itertools
方法以单线方式获取部分。
from collections import Counter
from itertools import groupby
def repeating_letters(the_string):
return Counter(''.join(group) for _, group in groupby(the_string))
或
from collections import Counter
from itertools import groupby
def repeating_letters(the_string):
return Counter(char*len(list(group)) for char, group in groupby(the_string))
我不确定其中哪个更快。
答案 1 :(得分:0)
from collections import Counter
from itertools import groupby
def splitter(text):
"""
text: str
return : frequency of continous characters
"""
string = [''.join(group) for key, group in groupby(text)]
return Counter(string)
l = 'aaaabcccdfffeeeeettfffaaaattiioccceeeeeeaaaa'
print(splitter(l))
输出
Counter({'aaaa': 3, 'ccc': 2, 'fff': 2, 'tt': 2, 'b': 1, 'd': 1, 'eeeee': 1, 'ii': 1, 'o': 1, 'eeeeee': 1})
其他方式,编码方法,不使用任何库
from collections import Counter
def function(string):
"""
string: str
return: frequency of continous same character
"""
res = []
tmp = []
if len(string)==0:
return Counter('')
val = string[0]
for i in range(1, len(string)):
if string[i] == val:
tmp.append(val)
val =string[i]
else:
tmp.append(val)
res.append(tmp)
tmp = []
val = string[i]
tmp.append(val)
res.append(tmp)
p = [''.join(i) for i in res]
return Counter(p)
l ='aaaabcccdfffeeeeettfffaaaattiioccceeeeeeaaaa'
print(function(l))
输出
Counter({'aaaa': 3, 'ccc': 2, 'fff': 2, 'tt': 2, 'b': 1, 'd': 1, 'eeeee': 1, 'ii': 1, 'o': 1, 'eeeeee': 1})