Python:搜索从电子书转换并替换关键字的txt文件中的关键字。

时间:2014-07-16 00:51:04

标签: python

我转换为txt文件的书大约有400页。我想遍历大约1000个关键字,并为书中的每个关键字添加前缀。我在搜索整本书时遇到了麻烦。

    lines = open('list.txt', 'r').read().split("\n")

    inpot = open('in.txt').read()
    fout = open('in.txt', 'w')

    def wr(lines):
        with open('in.txt', 'r') as inF:
            for line in inF:
                if lines in line:
                     fout.write(line.replace(lines, "$"+lines))


    for i in range(len(lines)):
        wr(lines[i])

2 个答案:

答案 0 :(得分:0)

我认为这是你想要实现的目标:

import os
import re

# Get keywords.
with open('list.txt', 'r') as f:
    keywords = f.read().split("\n")
    rekeywords = '(' + '|'.join(keywords) + ')'

# Write new file with '$' in front of keywords.
with open('in.txt', 'r') as f_in, open('out.txt', 'w') as f_out:
    for line in f_in:
        f_out.write(re.sub(rekeywords, r'$\1', line))

# Replace old file with new.
os.remove('in.txt')
os.rename('out.txt', 'in.txt')

答案 1 :(得分:0)

import re

def replace_keywords(input_file, keywords):
    for line in input_file.readlines():
        tmp_line = line
        for keyword in keywords:
            i = 0
            while i >= 0:
                m = keyword.search(tmp_line, match_index)
                if m:
                    tmp_line = tmp_line[i:] + '$' + tmp_line[:i]
        yield tmp_line

with open('lines.txt', 'r') as f:
    regex_keywords = [re.compile('\\b{0}\\b'.format(k), re.I) for k in f.readlines()]

with open('output.txt', 'w') as output:
    with open('input.txt', 'r') as input:
        print >> output, '\n'.join(replace_keywords(input, regex_keywords))