在计算列车数据预测和测试数据预测时,我很困惑,因为每个特征的值都被计算为相同,并且测试精度不太准确
library(dbplyr)
library(tidyverse)
library(caret)
用于测试和训练准确性的占位符
train_Data_prediction=rep(1,100)
test_Data_prediction=rep(1,100)
执行100次,然后平均精度
for(count in c(1:100))
{
data_train <- read.csv("parktraining.csv",FALSE,",")
data_train = as.matrix(data_train)
x_index=ncol(data_train)
data_Without_lable <- data_train[,-x_index]
lable <- data_train[,x_index]
Train_mean = apply(data_Without_lable,2,mean)
Train_sd = apply(data_Without_lable,2,sd)
Train_offsets <- t(t(data_Without_lable) - Train_mean)
Train_scaled_data <- t(t(Train_offsets) / Train_sd)
positive_ids = which(data_train[,x_index] == 1)
negative_ids = which(data_train[,x_index] == 0)
positive_data = Train_scaled_data[positive_ids,]
negative_data = Train_scaled_data[negative_ids,]
pos_Mean = apply(positive_data,2,mean)
positive_sd=apply(positive_data,2,sd)
neg_Mean = apply(negative_data,2,mean)
negative_sd=apply(negative_data,2,sd)
tested_data <- read.csv("parktesting.csv",FALSE,",")
tested_data = as.matrix(tested_data)
testing_data = tested_data[,-x_index]
predict=function(testing_data_row){
target=0;
p_pos=sum(log(dnorm(testing_data_row,pos_Mean,positive_sd)))
+log(length(positive_ids)/length(lable))
p_neg=sum(log(dnorm(testing_data_row,neg_Mean,negative_sd)))+log( 1 -
(length(negative_ids)/length(lable)))
if(p_pos>p_neg){
target=1
}else{
target=0
}
}
test_mean = apply(testing_data,2,mean)
test_sd = apply(testing_data,2,sd)
test_offset <- t(t(testing_data) - test_mean)
test_scaled_data <- t(t(test_offset) / test_sd)
test_prediction <- apply(test_scaled_data,1,predict)
target=tested_data[,x_index]
target
test_Data_prediction[count]=length(which((test_prediction == target)==TRUE))/length(target)
test_Data_prediction
#Predict for train data ( optional, output not printed )
train_prediction =apply(Train_scaled_data,1,predict)
train_Data_prediction[count]=length(which((train_prediction == lable)==TRUE))/length(lable)
}
test_Data_prediction
train_Data_prediction
print(paste("Average Train Data Accuracy:",mean(train_Data_prediction)*100.0,sep = " "))
print(paste("Average Test Data Accuracy:",mean(test_Data_prediction)*100.0,sep = " "))