我是R的新手。我的目录包含许多具有相同结构的数据框文件(3列用","分隔),如下所示: 日期和时间戳,V2,V3
我应该计算所有文件的V2和V3列的均值,中值,SD,方差和偏度,然后将这10列保存在单独的文件中。每列(例如,V2中位数)将包含所有文件中所有V2列的中位数。我需要你的帮助。
由于
答案 0 :(得分:1)
如果我理解你的问题是正确的,那么这是使用data.table
的快速解决方案。
# Load library
library(data.table)
library(moments)
library(readr)
# Get a List of `.csv` files in your folder
filenames <- list.files("C:/your/folder", pattern="*.csv", full.names=TRUE)
# Load and bind all data sets into one single data frame
df <- rbindlist(lapply(filenames,fread))
# run the summary statistics for V2 and V3
output <- df[,.( V2_mean= mean(V2)
, V2_median= median(V2)
, V2_SD= sd(V2)
, V2_var= var(V2)
, V2_skw= skewness(V2)
, V3_mean= mean(V3)
, V3_median= median(V3)
, V3_SD= sd(V3)
, V3_var= var(V3)
, V3_skw= skewness(V3)) ]
# save output in a different file
write_csv(output, "output.csv")
答案 1 :(得分:0)
还有一种方法:
#This library is needed to compute skewness
library(e1071)
#Set the directory where your files are present as home directory
setwd("Directory where your files are present")
#file names will be stored
files <- list.files(path = ".",pattern = ".CSV$", ignore.case = TRUE)
file<-NULL
for (i in 1:length(files)){
current_file=read.table(files[i],header = TRUE,sep = ",")
v2_mean=sapply(current_file[2], mean, na.rm = TRUE)
v2_median=sapply(current_file[2], median, na.rm = TRUE)
v2_SD=sapply(current_file[2], sd, na.rm = TRUE)
v2_variance=sapply(current_file[2], var, na.rm = TRUE)
v2_Skew=sapply(current_file[2], skewness, na.rm = TRUE)
v3_mean=sapply(current_file[3], mean, na.rm = TRUE)
v3_median=sapply(current_file[3], median, na.rm = TRUE)
v3_SD=sapply(current_file[3], sd, na.rm = TRUE)
v3_variance=sapply(current_file[3], var, na.rm = TRUE)
v3_Skew=sapply(current_file[3], skewness, na.rm = TRUE)
file<-rbind.data.frame(file,c(v2_mean,v2_median,v2_SD,v2_variance,v2_Skew,v3_mean,v3_median,v3_SD,v3_variance,v3_Skew))
}
names(file)<-c("v2_mean","v2_median","v2_SD","v2_variance","v2_Skew","v3_mean","v3_median","v3_SD","v3_variance","v3_Skew")
#Final file will be saved in the home directory
write.csv(file, "file_stats.csv")