我正在尝试对自己的数据运行Xgboost算法。为此,我首先尝试使用以下波士顿房价预测模型-
import treelite
import xgboost
from sklearn.datasets import load_boston
import treelite.runtime # runtime module
X, y = load_boston(return_X_y=True)
print('dimensions of X = {}'.format(X.shape))
print('dimensions of y = {}'.format(y.shape))
dtrain = xgboost.DMatrix(X, label=y)
params = {'max_depth':3, 'eta':1, 'silent':1, 'objective':'reg:linear',
'eval_metric':'rmse'}
bst = xgboost.train(params, dtrain, 20, [(dtrain, 'train')])
bst.save_model('bst1.model')
bst = xgboost.Booster({'nthread':4}) #init model
bst.load_model("bst1.model") # load data
model = treelite.Model.from_xgboost(bst)
toolchain = 'gcc'
model.export_lib(toolchain=toolchain, libpath='./mymodel.dylib', verbose=True)
model.export_lib(toolchain=toolchain, libpath='./mymodel.dylib',params={'parallel_comp': 32}, verbose=True)
predictor = treelite.runtime.Predictor('./mymodel.dylib', verbose=True)
batch = treelite.runtime.Batch.from_npy2d(X, rbegin=10, rend=20)
out_pred = predictor.predict(batch)
print(out_pred)
这很好。现在,我使用相同的概念来训练和预测自己的数据
def predict(data):
features_noise = np.zeros((5, ))
snr, rr_num, var, fr, fr2 = find_noise_features(data)
features_noise[0] = snr
features_noise[1] = rr_num
features_noise[2] = var
features_noise[3] = fr
features_noise[4] = fr2
features = extract_basic_features(data, 30000)
features = np.hstack((features, features_noise.reshape(1, -1)))
bst = xgb.Booster({'nthread': 4})
bst.load_model("xgb_model.bin")
dfeatures = xgb.DMatrix(features)
prediction = bst.predict(dfeatures,ntree_limit=420)
prediction = prediction.astype('int8')
result = data_preprocess.encoder.inverse_transform(prediction)
# print(prediction)
# print(result)
model = treelite.Model.from_xgboost(bst)
toolchain = 'gcc'
#model.export_lib(toolchain=toolchain, libpath='./afibmodel.dylib', verbose=True)
model.export_lib(toolchain=toolchain, libpath='./afibmodel.dylib',params={'parallel_comp': 32}, verbose=True)
predictor = treelite.runtime.Predictor('./afibmodel.dylib', verbose=True)
# batch = treelite.runtime.Batch.from_csr(dfeatures)
batch = treelite.runtime.Batch.from_npy2d(dfeatures) #I suspected I am doing something wrong here.
out_pred = predictor.predict(batch)
print(out_pred)
data = genfromtxt('AFIB.csv', delimiter=',')
predict(data)
print("Time taken in feature extraction : ", time.time() - start_time)
我收到此错误-
batch = treelite.runtime.Batch.from_npy2d(dfeatures)
*** ValueError: mat must be of type numpy.ndarray
我在这里做错了,但是毫无头绪。任何建议都将受到高度赞赏。
这是我关注的帮助教程-https://treelite.readthedocs.io/en/latest/tutorials/first.html