Numpy Scalar Array错误

时间:2018-01-19 18:15:46

标签: python arrays numpy

我正在研究这个数据集: https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)

我最初编写了这段代码:

std::vector

这对我来说绝对没问题。

为了方便起见,我接着使用了pandas,这是我修改过的代码:

 import bso as opt
 from sklearn import svm
 import numpy as np

 with open("breastcancer/train_data.txt") as f:
    tr_d=np.array([[float(d) for d  in data.split(',')] for data in 
    f.read().splitlines()])
 with open("breastcancer/test_data.txt") as f:
    te_d=np.array([[float(d) for d  in data.split(',')] for data in 
    f.read().splitlines()])

 with open("breastcancer/train_data_label.txt") as f:
    tr_l=np.array([int(data) for data in f.read().splitlines()])

 with open("breastcancer/test_data_label.txt") as f:
    te_l=np.array([int(data) for data in f.read().splitlines()])


  def check(gen,tr_d,tr_l,te_d,te_l):
    mask=np.array(gen) > 0
    al_data=np.array([al[mask] for al in tr_d])
    al_test_data=np.array([al[mask] for al in te_d])
    res=svm.LinearSVC().fit(al_data,tr_l).predict(al_test_data)
    score=np.count_nonzero(te_l==res)/len(te_l)
    return score

  gen1=[1]*9
  print("all_feature:\n\t{0}   {1}  
{2}".format("".join(map(str,gen1)),check(gen1,tr_d,tr_l,te_d,te_l),len(gen1)))

class Evaluate:
    def __init__(self):
    self.train_l=tr_l
    self.train_d=tr_d
    self.test_l=te_l
    self.test_d=te_d
    self.dim=len(tr_d[0])
def evaluate(self,gen):
    mask=np.array(gen) > 0
    print (mask)
    al_data=np.array([al[mask] for al in self.train_d])
    al_test_data=np.array([al[mask] for al in self.test_d])
    res=svm.LinearSVC().fit(al_data,self.train_l).predict(al_test_data)
    score=np.count_nonzero(self.test_l==res)/len(self.test_l)
    return score
def check_dimentions(self,dim):
    if dim==None:
        return len(self.train_d[0])
    else:
        return dim

我现在收到此错误:

import bso as opt
from sklearn import svm
import numpy as np
import sys
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

try:

    data_df = pd.read_csv("breast-cancer-wisconsin.csv")

except Exception as e:
    print(e)
   sys.exit(1)
data_df.columns = ['id', 'f0', 'f1','f2','f3','f4','f5','f6','f7','f8','label']
data_df.drop(['id'],axis=1,inplace=True)
data_df['label'] = [0 if x == 2 else 1 for x in data_df['label']]
X = data_df.drop(['label'],axis=1)
y = data_df.label

tr_d, te_d, tr_l, te_l = train_test_split(X,y,test_size=0.3,random_state=42)





def check(gen,tr_d,tr_l,te_d,te_l):
    mask=np.array(gen) > 0
    al_data=np.array([al[mask] for al in tr_d])
    al_test_data=np.array([al[mask] for al in te_d])
    rfc= RandomForestClassifier(n_estimators=10)
    rfc.fit(tr_d,tr_l)
    score = rfc.score(te_d,te_l)
    return score

gen1=[1]*9
print("all_feature:\n\t{0}   {1}  {2}".format("".join(map(str,gen1)),check(gen1,tr_d,tr_l,te_d,te_l),len(gen1)))


class Evaluate:
   def __init__(self):
       self.train_l=tr_l
       self.train_d=tr_d
       self.test_l=te_l
       self.test_d=te_d
       self.dim=9
   def evaluate(self,gen):
        mask=np.array(gen) > 0
        al_data=np.array([al[mask] for al in self.train_d])
        al_test_data=np.array([al[mask] for al in self.test_d])
        rfc= RandomForestClassifier(n_estimators=10)
        res=rfc.fit(tr_d,tr_l)
        score = rfc.score(te_d,te_l)
    return score 
    def check_dimentions(self,dim):
        if dim==None:
            return 9
        else:
            return dim

我在第31行得到了这个,有关如何解决这个问题的任何帮助,所以我可以在后面的代码中使用这个功能会很棒。

0 个答案:

没有答案