使用R base,我想将一个变量添加到嵌套列表中,其中变量为每个嵌套列表元素更改。以下是一个例子。谢谢。
#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))
#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)
DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)
DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)
#PRINT MODIFIED NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))
DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
答案 0 :(得分:7)
这应该这样做
final_list<-list()
for(i in seq(1, length(DF))){
new_list<-list()
for(j in seq(1,length(DF[[i]]))){
new_list[[j]]<-list(DF[[i]][[j]],GROUP=j)
}
final_list[[i]]<-new_list
}
答案 1 :(得分:7)
#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))
#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)
DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)
DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)
#PRINT MODIFIED NESTED LIST
DF1 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF1
#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))
GROUPS <- c(2014:2015)
DF <- lapply(DF, function(xs) lapply(1:2, function(t) within(xs[[t]], GROUP <- GROUPS[t])))
DF2 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF2
all.equal(DF1, DF2)
答案 2 :(得分:4)
这个怎么样?
必须使用包dplyr
,因为它可以使用mutate_
更轻松地向数据框添加新变量。
library(dplyr) # if not installed, install with install.packages("dplyr")
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"),
DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF,
as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t +
i, "12", "31", sep = "-"))), ]))
# loop over the first list with lapply and then loop over the nested lists
# and the desired GROUP values with mapply
DF <- lapply(DF, function(x) mapply(FUN = function(df,number){mutate_(df,
"GROUP" = number)},x, 2014:2015, SIMPLIFY = F))
#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
至于为什么你的方式不起作用:想想function(t) within(xs[[t]], GROUP <- GROUPS[t])
的作用。它不会返回数据帧。
答案 3 :(得分:3)
这也可以使用Map
来实现cbind
日期。即,
lapply(DF, function(i) Map(cbind, i, c(2014, 2015)))
#or to set the name of that column to 'id',
lapply(DF, function(i) Map(function(x, y) cbind(x, id = y), i, c(2014, 2015)))
但是,如果您不介意展平该列表并保留额外的id变量,则可以使用tidyverse
来展平并通过从DATE
的第一个元素中抓取年份来创建年份ID来自每个数据框,即
library(tidyverse)
new_df <- DF %>%
flatten_df(.id = 'list_id') %>%
group_by(list_id) %>%
mutate(id = sub('-.*', '', DATE[1]))
#which will give,
# A tibble: 19,725 x 4
# Groups: list_id [6]
# list_id NAME DATE id
# <chr> <fctr> <date> <chr>
# 1 1 FRANK 2014-01-01 2014
# 2 1 TONY 2014-01-01 2014
# 3 1 ED 2014-01-01 2014
# 4 1 FRANK 2014-01-02 2014
# 5 1 TONY 2014-01-02 2014
# 6 1 ED 2014-01-02 2014
# 7 1 FRANK 2014-01-03 2014
# 8 1 TONY 2014-01-03 2014
# 9 1 ED 2014-01-03 2014
#10 1 FRANK 2014-01-04 2014
# ... with 19,715 more rows
答案 4 :(得分:2)
我认为问题可能是第二个lapply
:
DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))
lapply
似乎没有从大列表对象中提取所需的组件。第一个lapply
遍历列表的顶层,每次都提取一个双元素列表对象x
。然后第二个lapply
遍历一个向量,每次都提供一个标量向量t
。因此,下一部分每次都会得到一个两元素列表(x
),而不是所需的(未命名的)数据帧。
如果已经创建了对象,则可以直接遍历元素而不是索引列表元素。
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))
edit_level2 <- function(df) {
# figure out what the value of t should be based on the data.
t <- as.integer(format(min(df$DATE), "%Y"))
df$GROUP <- t
return(df)
}
# iterate over the list object contents at *both* levels
DF <- lapply(DF, function(level1) lapply(level1, function(level2) edit_level2(level2)))
注意:这类似于@Consistency在评论中提出的解决方案 - 提取数据框是个问题。
如果你可以改变产生列表对象的代码,我建议在创建列表对象时分配变量,而不是在之后修改它(我在编辑之前的原始建议)。
#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"),
DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) {
lapply(2014:2015, function(t) {
first <- as.Date(paste(t, "01", "01", sep = "-"))
last <- as.Date(paste(t + i, "12", "31", sep = "-"))
# create a local data frame
df <- DF[first <= DF$DATE & DF$DATE <= last, ]
# modify the local data frame
df$GROUP <- t
# return the modified data frame
df
})
})
答案 5 :(得分:2)
all.equal
会返回TRUE
,但identical
则不会。在这里。请注意,我已经更改了你的df名称。
#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))
#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF2 <- DF
#I WOULD LIKE TO SIMPLIFY THIS PART
DF2[[1]][[1]] <- within(DF2[[1]][[1]], GROUP <- 2014)
DF2[[1]][[2]] <- within(DF2[[1]][[2]], GROUP <- 2015)
DF2[[2]][[1]] <- within(DF2[[2]][[1]], GROUP <- 2014)
DF2[[2]][[2]] <- within(DF2[[2]][[2]], GROUP <- 2015)
DF2[[3]][[1]] <- within(DF2[[3]][[1]], GROUP <- 2014)
DF2[[3]][[2]] <- within(DF2[[3]][[2]], GROUP <- 2015)
#PRINT MODIFIED NESTED LIST
lapply(DF2, lapply, function(x) rbind(head(x), tail(x)))
### New code
DF3 <- DF
DF3 <- lapply(DF3, function(x) {
lapply(2014:2015, function(t){
within(x[[t - 2013]], GROUP <- t)
})
})
identical(DF2, DF3)
all.equal(DF2, DF3)