自动化功能以应用于R

时间:2018-06-25 14:37:55

标签: r list

我有许多包含数据帧的列表,并且我有一些代码可以执行我在命名其中一个列表时想要的功能。我想做的是使过程自动化,以便在我的环境中的每个列表上执行功能(环境还包含我不想影响的其他内容)。在创建新对象的地方,我还想根据列表名称自动命名它们。请注意,函数是在列表的每个元素内的一个变量上执行的,而不是在列表或整个变量上执行的。

在下面的代码中,M10210102.list是列表之一的名称。 INT / EXT都是列表元素中包含的变量之一。在下面的故障代码中,namesMCH0list是仅包含所有列表名称的列表。 ONT.list是通过拆分ONT.list从中创建所有较小列表的主列表。

我的问题:有没有办法有效地自动化功能?预先感谢!

以下是我要执行的功能:

PercChangeDiff <- lapply(M10210102.list, function(x) {

  INTdif <- c(NA, diff(x[["INTprice"]]))
  EXTdif <- c(NA, diff(x[["EXTprice"]]))
  INTperc <- (INTdif / x[["INTprice"]]) * 100
  EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
  return(list(x[["WEEK"]], INTperc, EXTperc))
}
)

for(i in seq_along(PercChangeDiff)){
  names(PercChangeDiff[[i]]) <- c("WEEK","INTpercent", "EXTpercent")
}


#removing elements from list if they have fewer than 34 observations 
for (i in rev(seq_along(PercChangeDiff))){
  if (length(PercChangeDiff[[i]][["INTpercent"]]) < 34) (PercChangeDiff[[i] <- NULL)  
}

#removing elements from list if INTprice or EXTprice does not change
for (i in rev(seq_along(PercChangeDiff))){
  if (length(unique(PercChangeDiff[[i]][["INTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL) 
  if (length(unique(PercChangeDiff[[i]][["EXTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL) 
}

#############

####### AGGREGATING MEANS PER DATE FOR ALL ARTS WITHIN MCH0 #######

#removing first date 
for (i in seq_along(PercChangeDiff)){
  PercChangeDiff[[i]][["WEEK"]][[1]] <- NA
}

#aggregating means
library(tidyverse)
PercChangeAvg <- map(PercChangeDiff,as_tibble) %>%
  bind_rows %>%
  group_by(WEEK) %>%
  summarize_all(mean)


 PercChangeAvg <- PercChangeAvg[complete.cases(PercChangeAvg), ]

############### CREATING TIME SERIES ####################

#create a list of timeseries values for INT and EXT
timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))


#applying ccf to list of timeseries data
crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)

以下是我尝试使用第一个功能失败的内容:

for (g in seq_along(namesMCH0list)){
  lapply(get(namesMCH0list[g]), function(x) {

    INTdif <- c(NA, diff(x[["INTprice"]]))
    EXTdif <- c(NA, diff(x[["EXTprice"]]))
    INTperc <- (INTdif / x[["INTprice"]]) * 100
    EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
    return( assign(paste("PercChangeDiff", paste(namesMCH0list[g]), sep = "."), list(x[["WEEK"]], INTperc, EXTperc)))
  }
  )
}

##this does not work ##
for (i in seq_along(ONT.list)){
  x <<- paste(namesMCH0list[i])
  for (g in rev(seq_along(get(x)))){
    if (length((get(x))[[g]][["INTprice"]]) < 20) ((get(x))[[g]] <<- NULL)  
  }
}


for (g in seq_along(ONT.list)){ 
  x <<- paste(namesMCH0list[g])
  lapply(paste(x), function(x) { 
  if (length(get(x)[["INTprice"]]) < 20) (NULL)
})
}


for(w in seq_along(ONT.list)){
  lapply(get(namesMCH0list[g]), function (x) {
    if(length(x[["INTprice"]] < 34 )) (x <- NULL)
  })
  NULL
}

数据示例(一个包含两个元素的列表):

 list(structure(list(WEEK = structure(c(17441, 17448, 17455, 17462, 17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546, 17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609, 17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672, 17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "20180929-EA", class = "factor"), MCH_0_CD = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97), EXTprice = c(4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48)), row.names = c(931L, 3626L, 6325L, 9021L, 11709L, 14368L, 17008L, 19764L, 22528L, 25193L, 27849L, 30489L, 33126L, 35769L, 38426L, 41141L, 44030L, 46911L, 49770L, 52643L, 55538L, 58423L, 61320L, 64256L, 67195L, 70117L, 73049L, 75982L, 78950L, 81924L, 84886L, 87848L, 90816L, 93778L), class = "data.frame"), 
structure(list(WEEK = structure(c(17441, 17448, 17455, 17462, 
17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546, 
17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609, 
17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672, 
17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "20323113-EA", class = "factor"), MCH_0_CD = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.47, 
3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 
3.47, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2.47, 2.47, 2.47, 
2.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47), EXTprice = c(2, 
2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2, 2, 
2.03, 2, 2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(1138L, 3834L, 
6537L, 9232L, 11917L, 14547L, 17199L, 19956L, 22718L, 25381L, 
28036L, 30673L, 33312L, 35955L, 38609L, 41357L, 44247L, 47124L, 
49984L, 52859L, 55752L, 58636L, 61536L, 64470L, 67408L, 70330L, 
73262L, 76204L, 79171L, 82147L, 85107L, 88068L, 91035L, 93994L
), class = "data.frame"))

1 个答案:

答案 0 :(得分:0)

考虑使用内部数据框并从那里运行过滤。为了完全自动化,请在返回最后一个对象 crossCorrAvg 的主函数中概括您的过程。

master_function_process <- function(currlist) {
    PercChangeDiff <- lapply(currlist, function(x) {  
        # NEW DATAFRAME COLUMNS 
        x$INTdif <- c(NA, diff(x$INTprice))
        x$EXTdif <- c(NA, diff(x$EXTprice))
        x$INTpercent <- (x$INTdif / x$INTprice) * 100
        x$EXTpercent <- (x$EXTdif / x$EXTprice) * 100

        # DATAFRAME SUBSETTED COLUMNS
        tmp <- x[c("WEEK", "INTprice", "EXTprice", "INTpercent", "EXTpercent")]

        # FILTERS
        tmp <- tmp[tmp$INTpercent >= 34,]
        tmp <- tmp[tmp$INTprice >= 2 | tmp$EXTprice >= 2,]

        # REMOVE FIRST DATE
        tmp$WEEK[[1]] <- NA
        return(tmp)
   })

   # AVERAGE AGGREGATION BY WEEK (NO map)
   PercChangeAvg <- bind_rows(PercChangeDiff) %>%
        group_by(WEEK) %>%
        summarize_all(mean) %>%
        filter(complete.cases(.))

   # TIME SERIES VALUES FOR INT AND EXT
   timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
   timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))

  # APPLY CCF AND ADD snames
  crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)
  crossCorrAvg <- lapply(crossCorrAvg, function(i) within(i, snames <- names(i))
}

# PROCESS ALL LISTS RETRIEVED WITH mget
master_crossCorrAvg_list <- lapply(mget(namesMCH0list), master_function_process)