从单独的data.frames计算不同列的摘要统计信息

时间:2016-05-01 14:02:42

标签: r dataframe feature-extraction

我是R的新手。我的目录包含许多具有相同结构的数据框文件(3列用","分隔),如下所示: 日期和时间戳,V2,V3

我应该计算所有文件的V2和V3列的均值,中值,SD,方差和偏度,然后将这10列保存在单独的文件中。每列(例如,V2中位数)将包含所有文件中所有V2列的中位数。我需要你的帮助。

由于

2 个答案:

答案 0 :(得分:1)

如果我理解你的问题是正确的,那么这是使用data.table的快速解决方案。

# Load library
  library(data.table)
  library(moments)
  library(readr)



# Get a List of  `.csv` files in your folder
  filenames <- list.files("C:/your/folder", pattern="*.csv", full.names=TRUE)


# Load and bind all data sets into one single data frame
  df <- rbindlist(lapply(filenames,fread))


# run the summary statistics for V2 and V3

output <- df[,.(  V2_mean= mean(V2)
                , V2_median= median(V2)
                , V2_SD= sd(V2)
                , V2_var= var(V2)
                , V2_skw= skewness(V2)
                , V3_mean= mean(V3)
                , V3_median= median(V3)
                , V3_SD= sd(V3)
                , V3_var= var(V3)
                , V3_skw= skewness(V3)) ]


# save output in a different file
  write_csv(output, "output.csv")

答案 1 :(得分:0)

还有一种方法:

#This library is needed to compute skewness
library(e1071) 

#Set the directory where your files are present as home directory
setwd("Directory where your files are present")

#file names will be stored
files <- list.files(path = ".",pattern = ".CSV$", ignore.case = TRUE)
file<-NULL
for (i in 1:length(files)){
current_file=read.table(files[i],header = TRUE,sep = ",")
v2_mean=sapply(current_file[2], mean, na.rm = TRUE)
v2_median=sapply(current_file[2], median, na.rm = TRUE)
v2_SD=sapply(current_file[2], sd, na.rm = TRUE)
v2_variance=sapply(current_file[2], var, na.rm = TRUE)
v2_Skew=sapply(current_file[2], skewness, na.rm = TRUE)

v3_mean=sapply(current_file[3], mean, na.rm = TRUE)
v3_median=sapply(current_file[3], median, na.rm = TRUE)
v3_SD=sapply(current_file[3], sd, na.rm = TRUE)
v3_variance=sapply(current_file[3], var, na.rm = TRUE)
v3_Skew=sapply(current_file[3], skewness, na.rm = TRUE)
file<-rbind.data.frame(file,c(v2_mean,v2_median,v2_SD,v2_variance,v2_Skew,v3_mean,v3_median,v3_SD,v3_variance,v3_Skew))
}
names(file)<-c("v2_mean","v2_median","v2_SD","v2_variance","v2_Skew","v3_mean","v3_median","v3_SD","v3_variance","v3_Skew")

#Final file will be saved in the home directory
write.csv(file, "file_stats.csv")