我正在使用 0
,如下所述,对二进制分类使用交叉验证(类标签为1
和from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_score
clf=RandomForestClassifier(random_state = 42, class_weight="balanced")
k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
accuracy = cross_val_score(clf, X, y, cv=k_fold, scoring = 'accuracy')
print("Accuracy: " + str(round(100*accuracy.mean(), 2)) + "%")
f1 = cross_val_score(clf, X, y, cv=k_fold, scoring = 'f1_weighted')
print("F Measure: " + str(round(100*f1.mean(), 2)) + "%")
)。
1
现在,我想使用cross validation
类并具有pred = clf.predict_proba(X)[:,1]
print(pred)
probs = clf.predict_proba(X)
best_n = np.argsort(probs, axis=1)[:,-6:]
个结果的预测概率对数据进行排序。为此,我尝试了以下两种方法。
#define MES_LEN 100
#define m 23
typedef struct {
char index[m];
char name[MES_LEN];
char release[MES_LEN];
char length[MES_LEN];
char artist[MES_LEN];
} INFORM;
typedef struct list_elem {
INFORM inform;
struct list_elem *next;
}
LEL;
LEL* list;
void AddElement(void); // Writing new element to file (DATAinput)
void LoadData(void); // Reading previously written data
void ReadElement (void); // Reading data from existing file data (DATA)
void OutpuAll(void); // Output in console all previously added data from both files
int menu(void);
int main() {
...
case 1: OutpuAll(); break; // Output all data
case 2: AddElement(); ReadElement(); break; // Add element to file
case 3: LoadData(); break; // Loading data from the file
...
}
void AddElement (void) // I have an empty file that i fill and then read and link to my main structure(this works fine)
{
char albN[25], albR[11], albL[11], albA[25], albI[5];
FILE * fileD;
fileD = fopen("DATAinput.txt", "r");
...
printf("Enter album artist \n");
scanf("%s", albA);
fprintf(fileD, "%s ", albA);
fclose(fileD);
}
void ReadElement (void) // Reading previously written data(AddElement)
{
LEL *ptr;
ptr = (LEL*)malloc(sizeof(LEL));
FILE * fileD;
fileD = fopen("DATAinput.txt", "r");
if (fileD == NULL) {
printf("Can not open the file\n");
exit(1);
}
while(fscanf (fileD, "%s %s %s %s %s", ptr->inform.index, ptr->inform.name, ptr->inform.release, ptr->inform.length, ptr->inform.artist) != EOF)
{ ptr->next=NULL;
printf("\n Id:%s Album: %s Released: %s Length: %s Artist: %s", ptr->inform.index, ptr->inform.name, ptr->inform.release, ptr->inform.length, ptr->inform.artist); }
fclose(fileD);
free(ptr);
}
void LoadData(void)
// Reading existing data from file
{
LEL *current, *head;
LEL *ptr;
ptr = (LEL*)malloc(sizeof(LEL));
FILE * file;
file = fopen("DATA.txt", "r");
if (file == NULL) {
printf("Can not open the file\n");
}
while(fscanf (file, "%s %s %s %s %s", ptr->inform.index, ptr->inform.name, ptr->inform.release, ptr->inform.length, ptr->inform.artist) != EOF)
{ printf("\n %s Album: %s Released: %s Length: %s Artist: %s", ptr->inform.index, ptr->inform.name, ptr->inform.release, ptr->inform.length, ptr->inform.artist); }
fclose(file);
ptr->next = NULL;
}
void OutpuAll(void) // Trying to output data that i previously loaded from files to structure.
{
LEL *ptr;
LEL* ptr = list;
while (ptr!=NULL) {
printf("\n Id:%s Album: %s Released: %s Length: %s Artist: %s", ptr->inform.index, ptr->inform.name, ptr->inform.release, ptr->inform.length, ptr->inform.artist);}
}
ptr = ptr->next;
}
我收到以下错误
NotFittedError :不适合此RandomForestClassifier实例 然而。使用此方法之前,请使用适当的参数调用“适合”。
对于这两种情况。
我只是想知道我在哪里弄错了。
很高兴在需要时提供更多详细信息。
答案 0 :(得分:2)
我使用以下代码解决了我的问题:
proba = cross_val_predict(clf, X, y, cv=k_fold, method='predict_proba')
print(proba[:,1])
print(np.argsort(proba[:,1]))
答案 1 :(得分:1)
看看documentation,它指定了概率是根据树的平均结果来计算的。
在您的情况下,您首先需要调用fit()
方法以在模型中生成发束。使模型适合训练数据后,您可以调用predict_proba()
方法。
这也在错误中指定。
# Fit model
model = RandomForestClassifier(...)
model.fit(X_train, Y_train)
# Probabilty
model.predict_proba(X)[:,1]
答案 2 :(得分:1)
如果您想将CV模型用于看不见的数据点,请使用以下方法。
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
iris = datasets.load_iris()
X = iris.data
y = iris.target
clf = RandomForestClassifier(n_estimators=10, random_state = 42, class_weight="balanced")
cv_results = cross_validate(clf, X, y, cv=3, return_estimator=True)
clf_fold_0 = cv_results['estimator'][0]
clf_fold_0.predict_proba([iris.data[133]])
# array([[0. , 0.5, 0.5]])