def split_on_separators(original, separators):
""" (str, str) -> list of str
Return a list of non-empty, non-blank strings from the original string
determined by splitting the string on any of the separators.
separators is a string of single-character separators.
>>> split_on_separators("Hooray! Finally, we're done.", "!,")
['Hooray', ' Finally', " we're done."]
"""
result = []
newstring = ''
for index,char in enumerate(original):
if char in separators or index==len(original) -1:
result.append(newstring)
newstring=''
if '' in result:
result.remove('')
else:
newstring+=char
return result
def average_sentence_length(text):
""" (list of str) -> float
Precondition: text contains at least one sentence. A sentence is defined
as a non-empty string of non-terminating punctuation surrounded by
terminating punctuation or beginning or end of file. Terminating
punctuation is defined as !?.
Return the average number of words per sentence in text.
>>> text = ['The time has come, the Walrus said\n',
'To talk of many things: of shoes - and ships - and sealing wax,\n',
'Of cabbages; and kings.\n'
'And why the sea is boiling hot;\n'
'and whether pigs have wings.\n']
>>> average_sentence_length(text)
17.5
"""
words=0
Sentences=0
for line in text:
words+=1
sentence=split_on_separators(text,'?!.')
for sep in sentence:
Sentences+=1
ASL=words/Sentences
return ASL
答案 0 :(得分:0)
可以通过使用空格分割列表中的每个句子并计算该列表的长度来计算单词。会有所帮助。
答案 1 :(得分:0)
通过使用正则表达式拆分分隔符,可以消除对第一个函数的需要。正则表达式函数是re.split()。这是一个清理版本,可以获得正确的结果:
import re
def average_sentence_length(text):
# Join all the text into one string and remove all newline characters
# Joining all text into one string allows us to find the sentences much
# easier, since multiple list items in 'text' could be one whole sentence
text = "".join(text).replace('\n', '')
# Use regex to split the sentences at delimiter characters !?.
# Filter out any empty strings that result from this function,
# otherwise they will count as words later on
sentences = filter(None, re.split('[!?.]', text))
# Set the word sum variable
wordsum = 0.0
for s in sentences:
# Split each sentence (s) into its separate words and add them
# to the wordsum variable
words = s.split(' ')
wordsum += len(words)
return wordsum / len(sentences)
data = ['The time has come, the Walrus said\n',
' To talk of many things: of shoes - and ships - and sealing wax,\n',
'Of cabbages; and kings.\n'
'And why the sea is boiling hot;\n'
'and whether pigs have wings.\n']
print average_sentence_length(data)
此函数的一个问题是,使用您提供的文本,它返回17.0而不是17.5。这是因为“......海象说”和“谈论......”之间没有空间。除了首先添加应该存在的空间之外,没有什么可以做的。
如果项目需要第一个函数(split_on_separators
),那么可以用函数替换re.split()
函数。然而,使用正则表达式比为它编写整个函数更可靠,更轻量级。
修改强>
我忘了解释filter()
功能。基本上,如果你给出None
类型的第一个参数,它将获取第二个参数并删除其中的所有“false”项。由于在Python中将空字符串视为false,因此将其删除。您可以详细了解filter()
here