我被赋予此代码来转换arff文件。我不得不下载numpy库,现在当我尝试使用我的文件运行它时,它给了我像keyerrors这样的keyerrors
“imgInfo [1] [clstrDct [clstr]] + = 1#递增簇数 KeyError:'cluster35 \ r'“
import numpy as np
def xfrm(arFil='KBcls-100-10-20'):
'''transform a clustered patch arff file to an image training / test file'''
global imgDct, clstrDct, num, clsts, lne
imgDct = {}
clstrDct = {}
with open(arFil + '.arff', 'r') as ptchFil:
while True: # find Cluster attribute
lne = ptchFil.readline()
if lne == '': return 'EOF bfore one'
if lne.lower().startswith('@attribute cluster'):
clsts = lne[lne.find('{')+1 : lne.find('}')].split(',')
num = len(clsts)
break
for i in range(len(clsts)): # map cluster names to integers 0+ w/ inverted mapping also
clstrDct[clsts[i]] = i
clstrDct[i] = clsts[i]
while True: # first patch data line
lne = ptchFil.readline()
if lne == '': return 'EOF bfore two'
if lne.startswith('@data'): break
while True:
lne = ptchFil.readline() # read through patch lines
if lne == '': break # EOF
if lne[-1] == '\n': lne=lne[:-1] # all end with \n except possibly the last line of the file
attrs = lne.split(',')
imgId = attrs[0]
clstr = attrs[-1]
cls = attrs[-2]
try: imgInfo = imgDct[imgId]
except KeyError:
imgInfo = [cls, np.zeros((num), dtype=int)] # new cluster counting array
imgDct[imgId] = imgInfo
imgInfo[1][clstrDct[clstr]] += 1 # increment the cluster count
with open(arFil + '-img.arff', 'w') as arFile:
arFile.write('% from {0:}.arff: {1:} patch clusters\n%\n'.format(arFil, num))
arFile.write('@relation Image-Patch-Clusters\n@attribute Image-ID numeric\n')
for i in range(num):
arFile.write('@attribute {} numeric\n'.format(clstrDct[i])) # cluster attributes
arFile.write('@attribute class {unknown, street, highway}\n@data')
for imid,iminfo in imgDct.items():
arFile.write('\n{}, '.format(imid))
for i in range(num):
arFile.write('{}, '.format(iminfo[1][i]))
arFile.write('{}'.format(iminfo[0]))
if __name__ == "__main__":
xfrm('Test1Clust')
答案 0 :(得分:1)
readline
包括与其他内容一起结束的行。这意味着您在每个\r
的末尾都有额外的\n
,\n\r
或attrs[-1]
。这就是&{34; \r
"中有cluster35\r
的原因。您可以使用strip
删除此内容。
clstr = attrs[-1].strip()