提前致谢。
我有一个来自 list.files 的文件路径列表。我试图调用每个文件并标准化列数,以便我可以将每个文件绑定到一个大 DF。
Team 是我的代码列表(注意 Add_X_Col 是一个标准化列数的函数):
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-x1 #Creates a new DF called Team_Split
Team<-Team[-1];#Removes the file path from the list
第二部分 - 我必须对定向文件路径中的所有文件重复这个 ~275 次
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-rbind(Team_Split,x1) #Binds to already made Team_Split DF
Team<-Team[-1];
我的预期输出将是一个大型数据帧,将文件路径中的每个文件作为数据帧读取并将它们绑定在一起。
我不熟悉循环和使用apply包;我试图理解它们以在此处使用它们,但无济于事!任何帮助将不胜感激。
按要求编辑。
Add_3_Col <- function(x){
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:12,40,13:24,41,42,25:39)]
return(x1)
}
Add_5_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X100<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,38,39,12:22,40,41,42,23:37)] #re-arrnages to put the columns in the correct place.
return(x1)
}
Add_4_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,39,40,12:23,41,42,24:38)]
return(x1)
}
答案 0 :(得分:1)
这对我有用,有一堆带有模拟数据的 CSV 文件。
Team <- list.files("c:\\Test\\Teams\\", full.names=TRUE)
Team_Split <- data.frame()
print(Team)
for (Team_File in Team) {
xl <-
read.csv(Team_File) #Reads the csv from the first file path
y <- ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y == "37") {
x1 <- Add_5_Col(x1)
} else if (y == "38") {
x1 <- Add_4_Col(x1)
} else if (y == "39") {
x1 <- Add_3_Col(x1)
}
# Sets Team_Split to xl if it's the first set of data
# or binds Team_Split and xl
print(xl)
if (nrow(Team_Split) == 0) {
Team_Split <- xl
} else {
Team_Split <- rbind(Team_Split, xl)
}
}
print(Team_Split)
答案 1 :(得分:0)
My intended output would be a large dataframe, reading each file in the file path as a dataframe and binding them together.
取决于文件在您的目录中的位置:
library(readr)
library(dplyr)
files <- dir("data/", pattern = "\\.csv$", full.names = TRUE)
df2_list <- vector("list", length(files))
names(df2_list) <- files
for (fname in files) {
df2_list[[fname]] <- read_csv(fname)
}
df2 <- bind_rows(df2_list)
然后是这样的?
library(dplyr)
df2 %>% summarise_each(funs(na.omit(.)))
此解决方案将 3 个单独的 .csv
文件与我机器上不同数量的列组合在一起,所有文件都位于我的工作目录中的 data
文件夹中。
df2 %>% summarise_each(funs(na.omit(.)))
帐户用于加入后生成的 NA
。