我想将名为(my_data2)的数据表划分为两个名为(学习样本和测试样本)的样本。如何在我的表的第一部分(第一个样本)上应用逻辑回归,然后在第二部分应用预测?谢谢。 这是我的编码;
import numpy as np
from statsmodels.formula.api import logit
FNAME2 ="C:/Users/lenovo/Desktop/Nouveau dossier (2)/table.csv"
FinalTableau=np.savetxt(FNAME2,my_data[index_to_use] , delimiter=",")
my_data2 = np.genfromtxt (FNAME2, delimiter = ',')
x= my_data2 [:,1]
a= my_data2[:,3]
#x with values 1 and 2
print x
#converts my binary data series from (1, 2) to (0,1)
x= my_data[:, 1] - 1
print x
form = 'x ~ a'
affair_model = logit (form, my_data2)
affair_result = affair_model.fit ()
print affair_result.summary ()
print affair_result.predict()
答案 0 :(得分:0)
将my_data2
分成两个大小相等的数组:
N = len(my_data2)//2
learning_sample, test_sample = my_data2[:N], my_data2[N:]
例如,
import numpy as np
from statsmodels.formula.api import logit
FNAME2 = "C:/Users/lenovo/Desktop/Nouveau dossier (2)/table.csv"
FinalTableau = np.savetxt(FNAME2, my_data[index_to_use], delimiter=",")
my_data2 = np.genfromtxt(FNAME2, delimiter=',')
# converts my binary data series from (1, 2) to (0,1)
my_data2[:, 1] -= 1
# print my_data2
N = len(my_data2)//2
learning_sample, test_sample = my_data2[:N], my_data2[N:]
x = learning_sample[:, 1]
a = learning_sample[:, 3]
# x with values 1 and 2
print x
form = 'x ~ a'
affair_model = logit(form, learning_sample)
affair_result = affair_model.fit()
print affair_result.summary()
print affair_result.predict()