计算火车的准确性和测试数据集

时间:2020-04-23 05:23:10

标签: r gaussian

在计算列车数据预测和测试数据预测时,我很困惑,因为每个特征的值都被计算为相同,并且测试精度不太准确

library(dbplyr)
library(tidyverse)
library(caret)

用于测试和训练准确性的占位符

    train_Data_prediction=rep(1,100)
    test_Data_prediction=rep(1,100)

执行100次,然后平均精度

    for(count in c(1:100))
    {
      data_train <- read.csv("parktraining.csv",FALSE,",")
      data_train = as.matrix(data_train)
      x_index=ncol(data_train)

      data_Without_lable <- data_train[,-x_index]
      lable <- data_train[,x_index]

      Train_mean = apply(data_Without_lable,2,mean)
      Train_sd = apply(data_Without_lable,2,sd)

      Train_offsets <- t(t(data_Without_lable) - Train_mean) 
      Train_scaled_data  <- t(t(Train_offsets) / Train_sd)

      positive_ids = which(data_train[,x_index] == 1)
      negative_ids = which(data_train[,x_index] == 0)

      positive_data = Train_scaled_data[positive_ids,]
      negative_data = Train_scaled_data[negative_ids,]

      pos_Mean = apply(positive_data,2,mean)
      positive_sd=apply(positive_data,2,sd)

      neg_Mean = apply(negative_data,2,mean)
      negative_sd=apply(negative_data,2,sd)

      tested_data <- read.csv("parktesting.csv",FALSE,",")
      tested_data = as.matrix(tested_data)

      testing_data = tested_data[,-x_index]

      predict=function(testing_data_row){
      target=0;

使用dnorm()函数进行正态分布并计算概率

        p_pos=sum(log(dnorm(testing_data_row,pos_Mean,positive_sd)))
          +log(length(positive_ids)/length(lable))
    p_neg=sum(log(dnorm(testing_data_row,neg_Mean,negative_sd)))+log( 1 - 
          (length(negative_ids)/length(lable)))

    if(p_pos>p_neg){
      target=1
    }else{
      target=0
    }  
}

  test_mean = apply(testing_data,2,mean)
  test_sd = apply(testing_data,2,sd)

  test_offset <- t(t(testing_data) - test_mean)
  test_scaled_data <- t(t(test_offset) / test_sd)

  test_prediction <- apply(test_scaled_data,1,predict)
  target=tested_data[,x_index]
  target

  test_Data_prediction[count]=length(which((test_prediction == target)==TRUE))/length(target)
  test_Data_prediction

  #Predict for train data ( optional, output not printed )
  train_prediction =apply(Train_scaled_data,1,predict)

  train_Data_prediction[count]=length(which((train_prediction == lable)==TRUE))/length(lable)
 }

test_Data_prediction
train_Data_prediction


print(paste("Average Train Data Accuracy:",mean(train_Data_prediction)*100.0,sep = " "))
print(paste("Average Test Data Accuracy:",mean(test_Data_prediction)*100.0,sep = " "))

0 个答案:

没有答案