""" 这产生了我需要它,但我需要做同样的事情而不使用拆分。我们的教授只教我们如何使用分裂和剥离,但他希望我们不要使用分裂 """
def main ():
textFileName= input('file name')
countWords(textFileName )
def countWords(textFileName ):
file=open(textFileName)
count=[]
for line in open (textFileName):
count.extend(line.split())
print(count)
print(len(count))
main()
答案 0 :(得分:2)
使用re.findall
功能。
count.extend(re.findall(r'\S+', line))
\S+
匹配一个或多个非空格字符。
示例:强>
>>> s = "foo bar buzz"
>>> s.split()
['foo', 'bar', 'buzz']
>>> re.findall(r'\S+', s)
['foo', 'bar', 'buzz']
答案 1 :(得分:0)
您可以将itertools.groupby
用于此目的。如果您的教授具有函数式语言的背景,他一定会欣赏:
>>> import itertools
>>> line = "abc def ghi"
>>> ["".join(token)
for space, token in itertools.groupby(line, key=lambda c: c.isspace())
if not space]
# ^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^
# keep only non group together
# space sequences consecutive chars
# in the result based on the result
# of `isspace`
['abc', 'def', 'ghi']
答案 2 :(得分:0)
您可以制作自定义拆分方法:
class Str(str):
def split(self, sep=None, maxsplit=-1):
if sep is not None and len(sep) != 1:
return NotImplemented # Can't figure out how to do seps longer than 1...
whole = []
curr = []
for i in self:
if i not in (sep or "\n \t"):
curr.append(i)
elif curr != []:
whole.append("".join(curr))
curr = []
whole.append("".join(curr))
if sep is None:
whole = [i for i in whole if i != ""]
if maxsplit > -1:
whole[maxsplit:] = ["".join(whole[maxsplit])]
return whole
然后使用它:
my_str = Str("Hello, How are you, List Item here, Index blah, Gonna split")
print(my_str.split())
# Output:
['Hello,', 'How', 'are', 'you,', 'List', 'Item', 'here,', 'Index', 'blah,', 'Gonna', 'split']
如果不是一个类,你可以使它成为一个函数:
def split(str_to_split, sep=None, max_split=-1):
if sep is not None and len(sep) != 1:
return NotImplemented
whole = []
curr = []
for i in str_to_split:
if i not in (sep or "\n \t"):
curr.append(i)
elif curr != []:
whole.append("".join(curr))
curr = []
whole.append("".join(curr))
if sep is None:
whole = [i for i in whole if i != ""]
if maxsplit > -1:
whole[maxsplit:] = ["".join(whole[maxsplit])]
return whole
答案 3 :(得分:0)
学习使用正则表达式将非常非常非常有用,所以一定要尝试并理解Avinash的例子。而itertools是一个非常强大的库。
另一种可能性是"".partition()
s = "foo bar fizz buzz"
l = []
while True:
res = s.partition(' ')
l.append(res[0])
if not res[2]:
break
s = res[2]
for w in l:
print w
这可以扩展到不止一行。