句子或标记语料库之间发生问题 但我无法解决代码 这是句子代码
def loadUntagged(fileName):
text = open(fileName).read()
sentences = list(tokenize.blankline(text))
retSentences = []
for sentence in sentences:
newSentence = []
sentence =list(tokenize.whitespace(sentence))
#print (sentence)
for token in sentence:
newSentence.append(token)
retSentences.append(newSentence)
def loadCorpus(corpus):
text = open(corpus).read()
sentences =list(tokenize.blankline(self))
newSentences = []
for sentence in sentences:
tokens =list(tokenize.whitespace(sentence))
这是TaggedOutput
代码:
untagged =pd.read_csv('test.csv','UTF-8','r')
print ('Tagging...')
taggedOutput = doTag(tagger,untagged)
tagged = pd.read_csv("Tagged_bangla_hmm.csv",'w',encoding="utf-8", header=None, delimiter = r'\s+',skip_blank_lines=False, engine='python')
for sentence in taggedOutput :
for word, tag in enumerate(tagged):
tagged.to_csv( str(word) + str(tag) )
print(tagged)
print('\n\n')
print ('Finished Tagging')