def hello_worddict(doc):
string_alpha = ""
list_of_noalpha = []
worddict = {}
newlist = []
for char in doc:
if char.isalpha():
string_alpha += char
elif char == "'" or char == "-":
if (string_alpha[-1]).isalpha():
string_alpha += char
else:
string_alpha += " "
list_of_noalpha.append(char)
alpha_list = string_alpha.split()
def remover(char):
for word in alpha_list:
if word[-1] == char:
del(alpha_list[lpha_list.index(word)])
word = word[0:-1]
n = alpha_list.append(word)
list_of_noalpha.append(char)
(return what????)
当这样调用时:
hello_worddict("Male, rool, rool, hello 'bro !!!!! \jKr22vy")
应该返回:
output = {"'": 1, 'vy': 1, '!': 5, '2': 2, 'bro': 1, 'rool': 2, 'Male': 1, 'hello': 1, 'Kr': 1, ',': 3}
答案 0 :(得分:1)
永远记住Python附带了大量电池。除了字符串中反斜杠的问题之外,这里有一个简单的分割方法:
import re
import collections
def hello_worddict(doc):
return collections.Counter(
w for w in re.split(r'(\d+|[a-zA-Z]+|.)', s) if w.strip())