将xgboost dmatrix转换为numy数组

时间:2019-09-05 14:43:22

标签: python numpy xgboost

我正在尝试对自己的数据运行Xgboost算法。为此,我首先尝试使用以下波士顿房价预测模型-

import treelite
import xgboost
from sklearn.datasets import load_boston
import treelite.runtime     # runtime module

X, y = load_boston(return_X_y=True)
print('dimensions of X = {}'.format(X.shape))
print('dimensions of y = {}'.format(y.shape))

dtrain = xgboost.DMatrix(X, label=y)
params = {'max_depth':3, 'eta':1, 'silent':1, 'objective':'reg:linear',
          'eval_metric':'rmse'}
bst = xgboost.train(params, dtrain, 20, [(dtrain, 'train')])

bst.save_model('bst1.model')

bst = xgboost.Booster({'nthread':4}) #init model
bst.load_model("bst1.model") # load data

model = treelite.Model.from_xgboost(bst)

toolchain = 'gcc'

model.export_lib(toolchain=toolchain, libpath='./mymodel.dylib', verbose=True)

model.export_lib(toolchain=toolchain, libpath='./mymodel.dylib',params={'parallel_comp': 32}, verbose=True)


predictor = treelite.runtime.Predictor('./mymodel.dylib', verbose=True)

batch = treelite.runtime.Batch.from_npy2d(X, rbegin=10, rend=20)

out_pred = predictor.predict(batch)
print(out_pred)

这很好。现在,我使用相同的概念来训练和预测自己的数据

def predict(data):

    features_noise = np.zeros((5, ))

    snr, rr_num, var, fr, fr2 = find_noise_features(data)
    features_noise[0] = snr
    features_noise[1] = rr_num
    features_noise[2] = var
    features_noise[3] = fr
    features_noise[4] = fr2
    features = extract_basic_features(data, 30000)
    features = np.hstack((features, features_noise.reshape(1, -1)))

    bst = xgb.Booster({'nthread': 4})
    bst.load_model("xgb_model.bin")
    dfeatures = xgb.DMatrix(features)
    prediction = bst.predict(dfeatures,ntree_limit=420)
    prediction = prediction.astype('int8')
    result = data_preprocess.encoder.inverse_transform(prediction)
    # print(prediction)
    # print(result)
    model = treelite.Model.from_xgboost(bst)
    toolchain = 'gcc'
    #model.export_lib(toolchain=toolchain, libpath='./afibmodel.dylib', verbose=True)
    model.export_lib(toolchain=toolchain, libpath='./afibmodel.dylib',params={'parallel_comp': 32}, verbose=True)
    predictor = treelite.runtime.Predictor('./afibmodel.dylib', verbose=True)


    # batch = treelite.runtime.Batch.from_csr(dfeatures)
    batch = treelite.runtime.Batch.from_npy2d(dfeatures) #I suspected I am doing something wrong here. 

    out_pred = predictor.predict(batch)
    print(out_pred)

data = genfromtxt('AFIB.csv', delimiter=',')
predict(data)
print("Time taken in feature extraction : ", time.time() - start_time)

我收到此错误-

batch = treelite.runtime.Batch.from_npy2d(dfeatures)
*** ValueError: mat must be of type numpy.ndarray

我在这里做错了,但是毫无头绪。任何建议都将受到高度赞赏。

这是我关注的帮助教程-https://treelite.readthedocs.io/en/latest/tutorials/first.html

0 个答案:

没有答案