我想通过pyrouge评估我的摘要。 pyrouge存在here。 pyrouge是ROUGE摘要评估包的python包装器。 我按顺序执行了这些命令:
git clone https://github.com/bheinzerling/pyrouge
cd pyrouge
python setup.py install
pyrouge_set_rouge_path /absolute/path/to/ROUGE-1.5.5/directory
python -m pyrouge.test
我的问题是当我想使用pyrouge来评估我的摘要时。我写了这些命令:
from pyrouge import Rouge155
r = Rouge155()
r.system_dir = "/home/afsharizadeh/Desktop/summarization/summarization_dataset/DUC_2007/2007/all_sum/system_sum/"
r.model_dir = "/home/afsharizadeh/Desktop/summarization/summarization_dataset/DUC_2007/2007/all_sum/ref_sum/"
r.system_filename_pattern = 'sum.(\d+).txt'
r.model_filename_pattern = 'sum.[A-Z].#ID#.txt'
output = r.convert_and_evaluate()
print(output)
output_dict = r.output_to_dict(output)
但是我收到了这个错误:
<pre>
--------------------------------------------------------------------------- UnicodeDecodeError Traceback (most recent call last) <ipython-input-8-b3bc5a66e7f0> in <module>()
6 r.model_filename_pattern = 'sum.[A-Z].#ID#.txt'
7
----> 8 output = r.convert_and_evaluate()
9 print(output)
10 output_dict = r.output_to_dict(output)
/home/afsharizadeh/anaconda3/lib/python3.6/site-packages/pyrouge/Rouge155.py in convert_and_evaluate(self, system_id, split_sentences, rouge_args)
358 if split_sentences:
359 self.split_sentences()
--> 360 self.__write_summaries()
361 rouge_output = self.evaluate(system_id, rouge_args)
362 return rouge_output
/home/afsharizadeh/anaconda3/lib/python3.6/site-packages/pyrouge/Rouge155.py in __write_summaries(self)
487 def __write_summaries(self):
488 self.log.info("Writing summaries.")
--> 489 self.__process_summaries(self.convert_summaries_to_rouge_format)
490
491 @staticmethod
/home/afsharizadeh/anaconda3/lib/python3.6/site-packages/pyrouge/Rouge155.py in __process_summaries(self, process_func)
481 "model files to {}.".format(new_system_dir, new_model_dir))
482 process_func(self._system_dir, new_system_dir)
--> 483 process_func(self._model_dir, new_model_dir)
484 self._system_dir = new_system_dir
485 self._model_dir = new_model_dir
/home/afsharizadeh/anaconda3/lib/python3.6/site-packages/pyrouge/Rouge155.py in convert_summaries_to_rouge_format(input_dir, output_dir)
200 """
201 DirectoryProcessor.process(
--> 202 input_dir, output_dir, Rouge155.convert_text_to_rouge_format)
203
204 @staticmethod
/home/afsharizadeh/anaconda3/lib/python3.6/site-packages/pyrouge/utils/file_utils.py in process(input_dir, output_dir, function)
27 input_file = os.path.join(input_dir, input_file_name)
28 with codecs.open(input_file, "r", encoding="UTF-8") as f:
---> 29 input_string = f.read()
30 output_string = function(input_string)
31 output_file = os.path.join(output_dir, input_file_name)
/home/afsharizadeh/anaconda3/lib/python3.6/codecs.py in read(self, size)
696 def read(self, size=-1):
697
--> 698 return self.reader.read(size)
699
700 def readline(self, size=None):
/home/afsharizadeh/anaconda3/lib/python3.6/codecs.py in read(self, size, chars, firstline)
499 break
500 try:
--> 501 newchars, decodedbytes = self.decode(data, self.errors)
502 except UnicodeDecodeError as exc:
503 if firstline:
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 947: invalid continuation byte
</pre>
我该怎么办?