我有一个简单的脚本,试图将csv数据文件转换为工具svm_light可以接受的表单。这是代码:
import csv
import sys
import numpy as np
from sklearn.cross_validation import train_test_split
def svm_light_conversion(row):
conv_row = row[len(row) - 1] + ' '
for i in xrange(len(row) - 1):
conv_row = conv_row + str(i + 1) + ':' + str(row[i]) + ' '
return conv_row
def reaData(inputfile):
with open(inputfile, 'r') as inFile:
reader = csv.reader(inFile)
my_content = list(reader)
my_content = my_content[0:len(my_content) - 1]
return my_content
def converToSVMLiteFormat(outputfile, train, test):
train_file = outputfile + '_train.dat'
test_file = outputfile + '_test.dat'
#svm_light conversion for training data
with open(train_file, 'wb') as txtfile:
for i in xrange(len(train)):
converted_row = svm_light_conversion(train[i]) + '\n'
txtfile.write(converted_row)
txtfile.close()
#svm_light conversion for test data#
with open(test_file, 'wb') as txtfile:
for i in xrange(len(test)):
converted_row = svm_light_conversion(test[i]) + '\n'
txtfile.write(converted_row)
txtfile.close()
def main():
inputfile = sys.argv[1]
outputfile = sys.argv[2]
content = reaData(inputfile)
train, test = train_test_split(content, train_size = 0.8) #split data
converToSVMLiteFormat(outputfile, train, test)
if __name__ == "__main__":
main()
之前它的工作非常好,但现在却突然发出错误:
(env)fieldsofgold@fieldsofgold-VirtualBox:~/new$ python prac.py data.csv outt
Traceback (most recent call last):
File "prac.py", line 4, in <module>
from sklearn.cross_validation import train_test_split
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/sklearn/cross_validation.py", line 32, in <module>
from .metrics.scorer import check_scoring
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/sklearn/metrics/__init__.py", line 7, in <module>
from .ranking import auc
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/sklearn/metrics/ranking.py", line 30, in <module>
from ..utils.stats import rankdata
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/sklearn/utils/stats.py", line 2, in <module>
from scipy.stats import rankdata as _sp_rankdata
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/scipy/stats/__init__.py", line 338, in <module>
from .stats import *
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/scipy/stats/stats.py", line 189, in <module>
from . import distributions
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/scipy/stats/distributions.py", line 10, in <module>
from ._distn_infrastructure import (entropy, rv_discrete, rv_continuous,
File "/home/fieldsofgold/new/env/local/lib/python2.7/site-packages/scipy/stats/_distn_infrastructure.py", line 44, in <module>
from new import instancemethod
File "/home/fieldsofgold/new/new.py", line 10, in <module>
response2 = urllib2.urlopen(row[12])
File "/usr/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 396, in open
protocol = req.get_type()
File "/usr/lib/python2.7/urllib2.py", line 258, in get_type
raise ValueError, "unknown url type: %s" % self.__original
ValueError: unknown url type: 0.0
有人可以帮我解析错误吗?似乎错误发生在sklearn的某处,但我完全不明白可能出错的地方。感谢。