我正在尝试在python中使用一个新包,即MERF(混合效果随机森林)。当我想将200000+行的数据与少量的簇(<100)拟合时,该模型始终会输出一条内存错误消息。我认为问题在于群集的数量。当我使用非常大的群集号(> 10000)时,它给出了有效的输出。
from merf import MERF
merf = MERF()
clusters_train = np.array([])
for i in np.arange(len(df_train)):
if 1570<=df_train['fss'][i]<=1875:
clusters_train = np.append(clusters_train,1)
elif 1510<=df_train['fss'][i]<=1569:
clusters_train = np.append(clusters_train,2)
elif 1450<=df_train['fss'][i]<=1509:
clusters_train = np.append(clusters_train,3)
elif 1340<=df_train['fss'][i]<=1449:
clusters_train = np.append(clusters_train,4)
elif 1001<=df_train['fss'][i]<=1339:
clusters_train = np.append(clusters_train,5)
else:
clusters_train = np.append(clusters_train,0)
clusters_train = pd.Series(clusters_train)
X_train = df_train[['ccs','pydx','gbr']]
Z_train = df_train[['pct30p','pct90p']]
y_train = y_train['bad']
merf.fit(X_train, Z_train, clusters_train, y_train)