转换类时出现循环错误

时间:2016-01-05 12:51:48

标签: r data.table

我正在尝试使用data.table::fread进行dq检查。所以我写下面的代码。我从文件传递所有变量的类。但是在for循环中,所有变量都被转换为character,因此如果循环失败。

#Select a file
file <- file.choose()

#seperator
seps <- ","

library(data.table)

#read first 100 rows
data <- fread(file, sep= seps, header = TRUE, na.strings = c("NA",".",""," "), nrows = 100) 

#get class of all the variables
classes <- sapply(data, class)

col_names <- colnames(data)

reference <- cbind(col_names, classes)

write.csv(reference,"reference.csv", , row.names = FALSE)

classes_new <- read.csv("reference.csv")

classes_final <- as.vector(classes_new[,2])


cols <- ncol(data)


num_qc <- data.frame(non_miss = numeric(0), miss= numeric(0), miss_percent= numeric(0), mean_val= numeric(0), min_val= numeric(0), quant_5= numeric(0), quant_99= numeric(0), max_val= numeric(0))


#for loop to check class of variable and then run the if loop
for (i in 1:40){

classes <- paste0("classes_final","[",i,"]")

sel = i

data_new <- fread(file, header = TRUE, na.strings = c("NA",".",""," "), colClasses = classes, select = sel)

num.data <- data_new[[1]]

coln <- colnames(data_new)

if(class(paste0("data_new","$",coln))== "numeric"){

#if (class(data_new[[1]]) == "numeric") 

#column name
#name <- colnames(data_new)

# non-missing values        
n.non.miss <- sum(!is.na(num.data))

# missing values
n.miss <- sum(is.na(num.data))

# missing percentage
n.miss.percent <- 100*(n.miss/nrow(data_new))

# mean value
n.mean <- mean(num.data, na.rm= TRUE)

# minimum value
n.min <- min(num.data, na.rm= TRUE)

# maximum value
n.max <- max(num.data, na.rm= TRUE)

# quantiles
n.quant5 <- quantile(num.data, probs= 0.50, na.rm= TRUE)

# quantiles
n.quant99 <- quantile(num.data, probs= 0.99, na.rm= TRUE)

# combine all results in data frame
n.output <- cbind(n.non.miss, n.miss, n.miss.percent, n.mean, n.min, n.quant5, n.quant99,n.max )}}

0 个答案:

没有答案