我有以下代码,并且不知道它失败的原因。
tokentagocc={}
lemmatokentagdict= {
"[',']": ("[',']", "['SYM.Pun.Comma']"), "['verursacht']": ("['verursachten']", "['ADJA.Pos.Nom.Pl.Masc']"), "['eine']": ("['Ein']", "['ART.Indef.Nom.Sg.Masc']"), "['Dollar']": ("['Dollar']", "['N.Reg.*2.*3.Masc']"), "['auf']": ("['auf']", "['APPR.Auf']"), "['Ausland']": ("['Ausland']", "['N.Reg.Acc.Sg.Neut']"), "['Soziale']": ("['Soziales']", "['N.Reg.Acc.Sg.Neut']"), "['Verkehr']": ("['Verkehr']", "['N.Reg.Acc.Sg.Masc']"), "['unterschlagen']": ("['unterschlagenen']", "['ADJA.Pos.Gen.Pl.Neut']"), "['rund']": ("['rund']", "['ADJD.Pos']"), "['staatlich']": ("['staatlichen']", "['ADJA.Pos.Gen.Pl.Neut']"), "['die']": ("['der']", "['ART.Def.Dat.Sg.Fem']"), "['alle']": ("['aller']", "['PRO.Indef.Attr.-3.Gen.Pl.Neut']"), "['für']": ("['für']", "['APPR.Acc']"), "['sie']": ("['sich']", "['PRO.Refl.Subst.3.Acc.Pl.*6']"), "['Milliarde']": ("['Milliarden']", "['N.Reg.Acc.Pl.Fem']"), "['in']": ("['ins']", "['APPRART.Acc.Sg.Neut']"), "['dadurch']": ("['dadurch']", "['PROADV.Dem']"), "['20']": ("['20']", "['CARD']"), "['weitergeleiten|weiterleiten']": ("['weitergeleitet']", "['VPP.Full.Psp']"), "['und']": ("['und']", "['CONJ.Coord.-2']"), '[]': ("['']", '[]'), "['Gesundheit']": ("['Gesundheit']", "['N.Reg.Acc.Sg.Fem']"), "['.']": ("['.']", "['SYM.Pun.Sent']"), "['Jahr']": ("['Jahr']", "['N.Reg.Dat.Sg.Neut']"), "['Geld']": ("['Gelder']", "['N.Reg.Gen.Pl.Neut']"), "['Rund']": ("['Rund']", "['ADJD.Pos']"), "['Schaden']": ("['Schäden']", "['N.Reg.Nom.Pl.Masc']"), "['Prozent']": ("['Prozent']", "['N.Reg.*2.*3.Neut']"), "['belaufen']": ("['beliefen']", "['VFIN.Full.3.Pl.Past.Ind']"), "['<unknown>']": ("['Verwaltungsmafia']", "['N.Reg.Dat.Sg.Fem']"), "['Teil']": ("['Teil']", "['N.Reg.Nom.Sg.Masc']"), "['von']": ("['von']", "['APPR.Dat']"), "['40']": ("['40']", "['CARD']"), "['werden']": ("['werde']", "['VFIN.Aux.3.Sg.Pres.Subj']")
}
for tokentag in lemmatokentagdict:
print (lemmatokentagdict[tokentag])
if lemmatokentagdict[tokentag] in tokentagocc.keys():
tokentagocc[lemmatokentagdict[tokentag]]+=1
print ("doubled")
else:tokentagocc[lemmatokentagdict[tokentag]]=1
由于第一个或第二个词典中没有任何关键词存在多个,我不知道为什么它的所有元组(标记,标记)都被计为1。至少在&#34; Ein&#34;应该不止一次。
使用较小版本的脚本测试它但没有运气,所以我想发布完整的代码。我对此有任何建议,我感到非常高兴! 提前谢谢。