Python决策树分类器

时间:2018-05-09 20:33:51

标签: python numpy scikit-learn

尝试使用scikit包构建树分类器,但是我在获取分类器输入的正确格式时遇到了问题。

import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

#import dataset
data = pd.read_table('Data/Breast.csv')
data.head(10)

enter image description here

  X=data[['clump_thickness','shape_uniformity','marginal_adhesion','epithelial_size','bare_nucleoli','bland_chromatin','normal_nucleoli','mitoses']]

X_train = X.values

Y = data[['class']]
Y_train = Y.values

model = DecisionTreeClassifier()
model 

model.fit(X_train,Y_train)

但是我收到以下错误消息:

  

ValueError Traceback(最近一次调用   最后)in()   ----> 1 model.fit(X_train,Y_train)

     

C:\用户\托比亚斯\应用程序数据\本地\程序\ python的\ python36 \ LIB \站点包\ sklearn \树\ tree.py   in fit(self,X,y,sample_weight,check_input,X_idx_sorted)       788 sample_weight = sample_weight,       789 check_input = check_input,    - > 790 X_idx_sorted = X_idx_sorted)       791回归自我       792

     

C:\用户\托比亚斯\应用程序数据\本地\程序\ python的\ python36 \ LIB \站点包\ sklearn \树\ tree.py   in fit(self,X,y,sample_weight,check_input,X_idx_sorted)       114 random_state = check_random_state(self.random_state)       115如果check_input:    - > 116 X = check_array(X,dtype = DTYPE,accept_sparse =“csc”)       117 y = check_array(y,ensure_2d = False,dtype = None)       118如果issparse(X):

     

C:\用户\托比亚斯\应用程序数据\本地\程序\ python的\ python36 \ LIB \站点包\ sklearn \ utils的\ validation.py   在check_array(array,accept_sparse,dtype,order,copy,   force_all_finite,ensure_2d,allow_nd,ensure_min_samples,   ensure_min_features,warn_on_dtype,estimator)       431 force_all_finite)       432否则:    - > 433 array = np.array(array,dtype = dtype,order = order,copy = copy)       434       435 if ensure_2d:

     

ValueError:无法将字符串转换为float:'?'

我做错了什么? 我可以看到X.values是dType = Object ...

由于

1 个答案:

答案 0 :(得分:0)

尝试执行此操作以确保您传递整数,如果您的集合包含字符串或分类值,或者显示另一个问题,我将使用解决方案编辑此答案:

requests= require("requests")
openssl = require("openssl")

nonce = tostring(os.time())
customer_id = 'some id'
api_key = 'some string'
API_SECRET = 'some other string'

message = nonce..customer_id..api_key
signature = openssl.hmac.digest('sha256', message, API_SECRET)
signature = string.upper(signature)

params = {['key'] = api_key, ['signature'] = signature, ['nonce'] = nonce}

params_str = {}
for k, v in pairs(params) do 
    table.insert(params_str, k..'='..v)
end
params = table.concat(params_str, '&')

-- params = "key=some string&nonce=1565817138&signature=7DE82CB42D1B8F38F7AC1A74EC3A5E06F1AA3A20A14F3DD0E7040F088FBF9F69"
print('params> '..params)

url = "http://httpbin.org/post"
-- url = "https://www.bitstamp.net/api/v2/balance/"

headers = {["Content-Type"] = "application/x-www-form-urlencoded"}

r = requests.post{url, data = params, headers = headers}
print(r.text)
print(r.status_code)