打开并读取多个文本文件并匹配单词

时间:2019-09-15 10:26:02

标签: python

如何创建一个读取两个文本文件并打印出与文本文件编号1匹配的单词的脚本?

下面的代码是我得到的最远的代码,它可以匹配字符串中的单词并打印出来,但是我需要它来读取两个或更多的大型文本文件并打印相同的匹配单词。谢谢。

import re

def get_words_from_string(s):
    return set(re.findall(re.compile('\w+'), s.lower()))

def get_words_from_file(fname):
    with open(fname, 'rb') as inf:
        return get_words_from_string(inf.read())

def all_words(needle, haystack):
    return set(needle).issubset(set(haystack))

def any_words(needle, haystack):
    return set(needle).intersection(set(haystack))

search_words = get_words_from_string("this my test")
find_in = get_words_from_string("If this were my test, I is passing")

print (search_words)

2 个答案:

答案 0 :(得分:1)

这可以通过使用列表理解进行压缩,但是可以完成

import os

def get_words(filename):
    wordlist = []
    with open(filename) as fp:
        for line in fp:
            wordsinline = line.strip().split()
            for item in wordsinline:
                if item not in wordlist:
                    wordlist.append(item)
    return wordlist

def find_common_words(filename1, filename2):
    wordlist1 = []
    wordlist2 = []
    matching_words = []

    wordlist1 = get_words(filename1)
    wordlist2 = get_words(filename2)

    matching_words = set(wordlist1) & set(wordlist2)
    print(matching_words)

def testit():
    # Assert in same directory as code
    os.chdir(os.path.abspath(os.path.dirname(__file__)))
    filename1 = 'words1.txt'
    filename2 = 'words2.txt'
    find_common_words(filename1, filename2)

if __name__ == '__main__':
    testit()

答案 1 :(得分:0)

使用reduce对komeil的答案进行了一些修改。您也可以尝试一下。

import os
from functools import reduce


def testit():
    os.chdir(os.path.abspath(os.path.dirname(__file__)))
    filename1 = 'words1.txt'
    filename2 = 'words2.txt'
    wordlist1 = set(map(str.strip, reduce(lambda x,y: x.extend(y) or x, map(str.split, open(filename1)))))
    wordlist2 = set(map(str.strip, reduce(lambda x,y: x.extend(y) or x, map(str.split, open(filename2)))))
    print(wordlist1 & wordlist2)


if __name__ == '__main__':
    testit()