我有一个脚本来计算BLEU得分,另一个脚本则根据单词的POS标签执行相同的操作。当我将它们合并为一个代码时,创建的POS标记文件为空,并且代码崩溃导致出现此错误:
sentences=cube(sentences)
TypeError: 'NoneType' object is not callable
由于文件为空,所以我不确定为什么。
我尝试过将POS文件创建成一个函数。仍然不起作用。
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction
import argparse
import sys
import csv
from cube.api import Cube
cube=Cube(verbose=True)
cube.load("en")
def argparser():
Argparser = argparse.ArgumentParser()
Argparser.add_argument('--reference', type=str, default='summaries.txt', help='Reference File')
Argparser.add_argument('--candidate', type=str, default='candidates.txt', help='Candidate file')
args = Argparser.parse_args()
return args
args = argparser()
reference = open(args.reference, 'r').readlines()
candidate = open(args.candidate, 'r').readlines()
with open('pos_r.txt', 'w') as out1, open('pos_h.txt', 'w')as out2:
for sentences in reference:
sentences=cube(sentences)
for sentence in sentences:
for entry in sentence:
out1.write(entry.upos+"\t"+entry.attrs)
out1.write('')
for sentences in candidate:
sentences=cube(sentences)
for sentence in sentences:
for entry in sentence:
out2.write(entry.upos+"\t"+entry.attrs)
out2.write('')
with open('results.txt','w')as out:
### BLEU
cc=SmoothingFunction()
if len(reference) != len(candidate):
raise ValueError('The number of sentences in both files do not match.')
score = 0.
for i in range(len(reference)):
score += sentence_bleu([reference[i].strip().split()], candidate[i].strip().split(),smoothing_function=cc.method5)
score /= len(reference)
out.write('BLEU score is ' + '{:.3}'.format(score))
### POSBLEU
with open('pos_r.txt', 'r') as f1, open('pos_h.txt', 'r')as f2:
f1=f1.readlines()
f2=f2.readlines()
if len(f1) != len(f2):
raise ValueError('The number of sentences in both files do not match.')
score = 0.
for i in range(len(f1)):
score += sentence_bleu([f1[i].strip().split()], f2[i].strip().split(),smoothing_function=cc.method5)
score /= len(f1)
out.write('POSBLEU score is ' + '{:.3}'.format(score))