如何从CrossTable中将错误百分比提取到变量中?

时间:2015-09-18 11:45:46

标签: r knn

以下代码:

portuguese_scores = read.table("https://raw.githubusercontent.com/JimGorman17/Datasets/master/student-por.csv",sep=";",header=TRUE)
portuguese_scores <- portuguese_scores[,!names(portuguese_scores) %in% c("school", "age", "G1", "G2")]
median_score <- summary(portuguese_scores$G3)['Median']
portuguese_scores$score_gte_than_median <- as.factor(median_score<=portuguese_scores$G3)
portuguese_scores <- portuguese_scores[,!names(portuguese_scores) %in% c("G3")]

normalize <- function(x){ return( (x - min(x) )/( max(x) - min(x) ) )}

require("plyr")

portuguese_scores$sex <- as.numeric(mapvalues(portuguese_scores$sex, from = c("M", "F"), to = c(0, 1)))
portuguese_scores$address <- as.numeric(mapvalues(portuguese_scores$address, from = c("U", "R"), to = c(0, 1)))
portuguese_scores$famsize <- as.numeric(mapvalues(portuguese_scores$famsize, from = c("LE3", "GT3"), to = c(0, 1)))
portuguese_scores$Pstatus <- as.numeric(mapvalues(portuguese_scores$Pstatus, from = c("T", "A"), to = c(0, 1)))
portuguese_scores$Mjob <- as.numeric(mapvalues(portuguese_scores$Mjob, from = c("at_home","health","other","services","teacher"), to = c(0, 1,2,3,4)))
portuguese_scores$Fjob <- as.numeric(mapvalues(portuguese_scores$Fjob, from = c("at_home","health","other","services","teacher"), to = c(0, 1,2,3,4)))
portuguese_scores$reason <- as.numeric(mapvalues(portuguese_scores$reason, from = c("course","home","other","reputation"), to = c(0, 1,2,3)))
portuguese_scores$guardian <- as.numeric(mapvalues(portuguese_scores$guardian, from = c("father","mother","other"), to = c(0, 1,2)))
portuguese_scores$schoolsup <- as.numeric(mapvalues(portuguese_scores$schoolsup, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$famsup <- as.numeric(mapvalues(portuguese_scores$famsup, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$paid <- as.numeric(mapvalues(portuguese_scores$paid, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$activities <- as.numeric(mapvalues(portuguese_scores$activities, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$nursery <- as.numeric(mapvalues(portuguese_scores$nursery, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$higher <- as.numeric(mapvalues(portuguese_scores$higher, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$internet <- as.numeric(mapvalues(portuguese_scores$internet, from = c("no","yes"), to = c(0, 1)))
portuguese_scores$romantic <- as.numeric(mapvalues(portuguese_scores$romantic, from = c("no","yes"), to = c(0, 1)))
port_n <- data.frame(lapply(portuguese_scores[1:28], normalize), portuguese_scores[29])

set.seed(123)
train_sample <- sample(nrow(port_n), 0.9 * nrow(port_n))
port_train <- port_n[train_sample,]
port_test <- port_n[-train_sample,][1:28]

#k-NN code
require("class")

port_test_labels <- portuguese_scores[-train_sample,29]
learn_DF <- data.frame()

i<- 15
pct_of_training_data <- sample(nrow(port_train), i/100 * nrow(port_train))
port_train_pct <- port_train[pct_of_training_data,][1:28]
port_train_labels <- port_train[pct_of_training_data,][,29]
port_pred <- knn(train = port_train_pct, test = port_test, cl = port_train_labels, k =10)

library(gmodels)
CrossTable(port_test_labels, y = port_pred, prop.chisq = FALSE)

生成以下输出:enter image description here

我想将橙色圈出的两个数字提取到变量中,这样我就可以计算错误百分比。

我该怎么做?

1 个答案:

答案 0 :(得分:3)

您可以将crosstable保存到对象。然后它只是一个列表,您可以按名称访问其元素。

x=CrossTable(port_test_labels, y = port_pred, prop.chisq = FALSE)

x$prop.tbl['TRUE','FALSE']

0.07692308

x$prop.tbl['FALSE','TRUE']

0.3076923