将变量添加到嵌套列表

时间:2017-07-03 18:18:50

标签: r list nested

使用R base,我想将一个变量添加到嵌套列表中,其中变量为每个嵌套列表元素更改。以下是一个例子。谢谢。

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)

DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)

DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))
DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

6 个答案:

答案 0 :(得分:7)

这应该这样做

final_list<-list()
for(i in seq(1, length(DF))){

  new_list<-list()

  for(j in seq(1,length(DF[[i]]))){

    new_list[[j]]<-list(DF[[i]][[j]],GROUP=j)

  }
  final_list[[i]]<-new_list
}

答案 1 :(得分:7)

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

#I WOULD LIKE TO SIMPLIFY THIS PART
DF[[1]][[1]] <- within(DF[[1]][[1]], GROUP <- 2014)
DF[[1]][[2]] <- within(DF[[1]][[2]], GROUP <- 2015)

DF[[2]][[1]] <- within(DF[[2]][[1]], GROUP <- 2014)
DF[[2]][[2]] <- within(DF[[2]][[2]], GROUP <- 2015)

DF[[3]][[1]] <- within(DF[[3]][[1]], GROUP <- 2014)
DF[[3]][[2]] <- within(DF[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
DF1 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF1

#I AM SURPRISED THE FOLLOWING DOES NOT WORK
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))),]))

GROUPS <- c(2014:2015)    

DF <- lapply(DF, function(xs) lapply(1:2, function(t) within(xs[[t]], GROUP <- GROUPS[t])))

DF2 <- lapply(DF, lapply, function(x) rbind(head(x), tail(x)))
DF2

all.equal(DF1, DF2)

答案 2 :(得分:4)

这个怎么样? 必须使用包dplyr,因为它可以使用mutate_更轻松地向数据框添加新变量。

library(dplyr) # if not installed, install with install.packages("dplyr")
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), 
DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, 
as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + 
i, "12", "31", sep = "-"))), ]))

# loop over the first list with lapply and then loop over the nested lists 
# and the desired GROUP values with mapply
DF <- lapply(DF, function(x) mapply(FUN = function(df,number){mutate_(df, 
"GROUP" = number)},x, 2014:2015, SIMPLIFY = F))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

至于为什么你的方式不起作用:想想function(t) within(xs[[t]], GROUP <- GROUPS[t])的作用。它不会返回数据帧。

答案 3 :(得分:3)

这也可以使用Map来实现cbind日期。即,

lapply(DF, function(i) Map(cbind, i, c(2014, 2015)))

#or to set the name of that column to 'id',

lapply(DF, function(i) Map(function(x, y) cbind(x, id = y), i, c(2014, 2015)))

但是,如果您不介意展平该列表并保留额外的id变量,则可以使用tidyverse来展平并通过从DATE的第一个元素中抓取年份来创建年份ID来自每个数据框,即

library(tidyverse)

new_df <- DF %>% 
  flatten_df(.id = 'list_id') %>% 
  group_by(list_id) %>% 
  mutate(id = sub('-.*', '', DATE[1]))

#which will give,

# A tibble: 19,725 x 4
# Groups:   list_id [6]
#   list_id   NAME       DATE    id
#     <chr> <fctr>     <date> <chr>
# 1       1  FRANK 2014-01-01  2014
# 2       1   TONY 2014-01-01  2014
# 3       1     ED 2014-01-01  2014
# 4       1  FRANK 2014-01-02  2014
# 5       1   TONY 2014-01-02  2014
# 6       1     ED 2014-01-02  2014
# 7       1  FRANK 2014-01-03  2014
# 8       1   TONY 2014-01-03  2014
# 9       1     ED 2014-01-03  2014
#10       1  FRANK 2014-01-04  2014
# ... with 19,715 more rows

答案 4 :(得分:2)

我认为问题可能是第二个lapply

DF <- lapply(DF, function(x) lapply(2014:2015, function(t) within(x, GROUP <- t)))

lapply似乎没有从大列表对象中提取所需的组件。第一个lapply遍历列表的顶层,每次都提取一个双元素列表对象x。然后第二个lapply遍历一个向量,每次都提供一个标量向量t。因此,下一部分每次都会得到一个两元素列表(x),而不是所需的(未命名的)数据帧。

解决方案

如果已经创建了对象,则可以直接遍历元素而不是索引列表元素。

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

edit_level2 <- function(df) {
    # figure out what the value of t should be based on the data.
    t <- as.integer(format(min(df$DATE), "%Y"))
    df$GROUP <- t
    return(df)
}

# iterate over the list object contents at *both* levels
DF <- lapply(DF, function(level1) lapply(level1, function(level2) edit_level2(level2)))

注意:这类似于@Consistency在评论中提出的解决方案 - 提取数据框是个问题。

替代

如果你可以改变产生列表对象的代码,我建议在创建列表对象时分配变量,而不是在之后修改它(我在编辑之前的原始建议)。

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), 
                  DATE = seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) {

    lapply(2014:2015, function(t) {

        first <- as.Date(paste(t,     "01", "01", sep = "-")) 
        last  <- as.Date(paste(t + i, "12", "31", sep = "-")) 

        # create a local data frame
        df <- DF[first <= DF$DATE & DF$DATE <= last, ]

        # modify the local data frame
        df$GROUP <- t

        # return the modified data frame
        df

    })

})

答案 5 :(得分:2)

好的,我想我已经明白了。至少all.equal会返回TRUE,但identical则不会。在这里。请注意,我已经更改了你的df名称。

#CREATE EXAMPLE DATAFRAME
DF <- expand.grid(NAME = c("FRANK", "TONY", "ED"), DATE =    seq(as.Date("2014-01-01"), as.Date("2018-12-31"), by = "day"))

#CREATE NESTED LIST
DF <- lapply(1:3, function(i) lapply(2014:2015, function(t) DF[with(DF, as.Date(paste(t, "01", "01", sep = "-")) <= DATE & DATE <= as.Date(paste(t + i, "12", "31", sep = "-"))), ]))

#PRINT NESTED LIST
lapply(DF, lapply, function(x) rbind(head(x), tail(x)))

DF2 <- DF
#I WOULD LIKE TO SIMPLIFY THIS PART
DF2[[1]][[1]] <- within(DF2[[1]][[1]], GROUP <- 2014)
DF2[[1]][[2]] <- within(DF2[[1]][[2]], GROUP <- 2015)

DF2[[2]][[1]] <- within(DF2[[2]][[1]], GROUP <- 2014)
DF2[[2]][[2]] <- within(DF2[[2]][[2]], GROUP <- 2015)

DF2[[3]][[1]] <- within(DF2[[3]][[1]], GROUP <- 2014)
DF2[[3]][[2]] <- within(DF2[[3]][[2]], GROUP <- 2015)

#PRINT MODIFIED NESTED LIST
lapply(DF2, lapply, function(x) rbind(head(x), tail(x)))

### New code
DF3 <- DF
DF3 <- lapply(DF3, function(x) {
        lapply(2014:2015, function(t){
            within(x[[t - 2013]], GROUP <- t)
        })
    })
identical(DF2, DF3)
all.equal(DF2, DF3)