file_str = input("Enter poem: ")
my_file = open(file_str, "r")
words = file_str.split(',' or ';')
我的计算机上有一个文件,其中包含一首非常长的诗,我想知道每行是否有任何重复的单词(因此它会被标点符号拆分)。
我有那么多,我不想使用模块或计数器,我更喜欢使用循环。有什么想法吗?
答案 0 :(得分:1)
您可以使用集来跟踪看到的项目和重复项:
>>> words = 'the fox jumped over the lazy dog and over the bear'.split()
>>> seen = set()
>>> dups = set()
>>> for word in words:
if word in seen:
if word not in dups:
print(word)
dups.add(word)
else:
seen.add(word)
the
over
答案 1 :(得分:0)
with open (r"specify the path of the file") as f:
data = f.read()
if(set([i for i in data if f.count(f)>1])):
print "Duplicates found"
else:
print "None"
答案 2 :(得分:0)
解决了!!! 我可以用工作程序给出解释
sam.txt 文件内容
<块引用>sam.txt
你好这是明星你好数据是你好所以你可以移动到 你好
file_content = []
resultant_list = []
repeated_element_list = []
with open(file="sam.txt", mode="r") as file_obj:
file_content = file_obj.readlines()
print("\n debug the file content ",file_content)
for line in file_content:
temp = line.strip('\n').split(" ") # This will strip('\n') and split the line with spaces and stored as list
for _ in temp:
resultant_list.append(_)
print("\n debug resultant_list",resultant_list)
#Now this is the main for loop to check the string with the adjacent string
for ii in range(0, len(resultant_list)):
# is_repeated will check the element count is greater than 1. If so it will proceed with identifying duplicate logic
is_repeated = resultant_list.count(resultant_list[ii])
if is_repeated > 1:
if ii not in repeated_element_list:
for2count = ii + 1
#This for loop for shifting the iterator to the adjacent string
for jj in range(for2count, len(resultant_list)):
if resultant_list[ii] == resultant_list[jj]:
repeated_element_list.append(resultant_list[ii])
print("The repeated strings are {}\n and total counts {}".format(repeated_element_list, len(repeated_element_list)))
输出:
debug the file content ['Hello this is abdul hello\n', 'the data are Hello so you can move to the hello']
debug resultant_list ['Hello', 'this', 'is', 'abdul', 'hello', 'the', 'data', 'are', 'Hello', 'so', 'you', 'can', 'move', 'to', 'the', 'hello']
The repeated strings are ['Hello', 'hello', 'the']
and total counts 3
谢谢
答案 3 :(得分:-1)
def Counter(text):
d = {}
for word in text.split():
d[word] = d.get(word,0) + 1
return d
有循环:/
分裂我们的职能
matches = re.split("[!.?]",my_corpus)
for match in matches:
print Counter(match)
答案 4 :(得分:-1)
对于这种文件;
A hearth came to us from your hearth
foreign hairs with hearth are same are hairs
这将检查whole
诗;
lst = []
with open ("coz.txt") as f:
for line in f:
for word in line.split(): #splited by gaps (space)
if word not in lst:
lst.append(word)
else:
print (word)
输出:
>>>
hearth
hearth
are
hairs
>>>
如你所见,这里有两个hearth
,因为在整首诗中有3 hearth
。
逐行检查;
lst = []
lst2 = []
with open ("coz.txt") as f:
for line in f:
for word in line.split():
lst2.append(word)
for x in lst2:
if x not in lst:
lst.append(x)
lst2.remove(x)
print (set(lst2))
>>>
{'hearth', 'are', 'hairs'}
>>>