我必须编写一个脚本来读取文件并设置标记<p></p>
和<w></w>
。然后,如果文本的单词与stopwords.txt中的单词匹配,则必须添加属性type =“ stopword”。但是我在for循环中遇到了上述错误,而且我似乎无法理解问题所在以及如何解决。
import re
from pathlib import Path
STOPWORD_FILE_PATH = Path("stopwords.txt")
def main():
while True:
user_input = input("Enter input file name or 'exit': ")
if user_input == "exit":
break
elif user_input:
input_file_path = Path(user_input)
if input_file_path.exists():
convert_file(input_file_path)
else:
print("File %s doesn't exist." % user_input)
def convert_file(input_file_path):
try:
with input_file_path.open(mode="r", encoding="utf-8") as input_file:
stopwords = import_stopwords()
converted_string = ""
for line in input_file:
converted_line = convert_line(line, stopwords)
if converted_line:
converted_string += converted_line
export_to_file(converted_string)
except IOError:
print("Couldn't open file.")
def convert_line(line, stopwords):
if len(line.strip()) == 0:
global converted_line
return converted_line
else :
marked_words = re.sub(r'(\w+)', r'<w>\1<w>', line)
marked_lines = print ('<p>'+ str(marked_words) + '<\p>')
for word in stopwords : #ERROR Here
if word == marked_lines:
set_attribute = re.sub(r'<w>', r'<w type="stopword">', stopwords)
return converted_line
pass
def import_stopwords():
try:
with STOPWORD_FILE_PATH.open(mode="r") as input_file:
content = input_file.read()
print(content)
except IOError:
empty_list=list()
print(empty_list)
return
pass
def export_to_file(converted_string):
while True:
user_input = input("Enter output file name or 'cancel': ")
if user_input == "cancel":
print("File export cancelled.")
break
elif user_input:
try:
output_file_path = Path(user_input)
with output_file_path.open(
mode="w", encoding="utf-8",
) as output_file:
output_file.write(converted_string)
print("File correctly exported.")
except IOError:
print("Couldn't save file %s." % user_input)
if __name__ == "__main__":
main()
这是完整的错误消息:
Traceback (most recent call last):
File "tp0312.py", line 120, in <module>
main()
File "tp0312.py", line 24, in main
convert_file(input_file_path)
File "tp0312.py", line 45, in convert_file
converted_line = convert_line(line, stopwords)
File "tp0312.py", line 69, in convert_line
for word in stopwords :
TypeError: 'NoneType' object is not iterable