我正在尝试使用从CMU ARK Twitter获得的训练集训练bigramtagger词性标签v0.3.2 http://www.ark.cs.cmu.edu/TweetNLP/训练集包含标记的推文但是当我尝试训练时我得到一个错误:
File "/Applications/PyCharm Edu.app/Contents/helpers/pycharm/pycharm_setup_runner.py", line 26, in <module>
exec (fh.read(), globals(), locals())
File "<string>", line 41, in <module>
File "/Users/ravinkohli/Library/Python/3.5/lib/python/site-packages/nltk/tag/sequential.py", line 341, in __init__
backoff, cutoff, verbose)
File "/Users/ravinkohli/Library/Python/3.5/lib/python/site-packages/nltk/tag/sequential.py", line 288, in __init__
self._train(train, cutoff, verbose)
File "/Users/ravinkohli/Library/Python/3.5/lib/python/site-packages/nltk/tag/sequential.py", line 179, in _train
tokens, tags = zip(*sentence)
ValueError: not enough values to unpack (expected 2, got 0)
我的代码是
from nltk import tag
import nltk
def read_from_file(file):
f = open(file, "r")
data = []
row = []
for line in f:
if line != "\n":
row.append(line)
elif line == "\n":
data.append(row)
row = []
list_item = []
data_final = []
for item in data:
for row1 in item:
row1 = row1.replace("\n","")
row1 = row1.split(" ")
list_item.append(row1)
data_final.append(list_item)
list_item=[]
return data_final
data = read_from_file("pos_training.txt")
list_item = []
final_tags = []
for row in data:
for item in row:
print(item)
tagged_token = tag.str2tuple(item[0] + "/" + item[1])
list_item.append(tagged_token)
final_tags.append(list_item)
list_item = []
bigram_tagger = nltk.BigramTagger(final_tags)