我有许多包含数据帧的列表,并且我有一些代码可以执行我在命名其中一个列表时想要的功能。我想做的是使过程自动化,以便在我的环境中的每个列表上执行功能(环境还包含我不想影响的其他内容)。在创建新对象的地方,我还想根据列表名称自动命名它们。请注意,函数是在列表的每个元素内的一个变量上执行的,而不是在列表或整个变量上执行的。
在下面的代码中,M10210102.list
是列表之一的名称。 INT
/ EXT
都是列表元素中包含的变量之一。在下面的故障代码中,namesMCH0list
是仅包含所有列表名称的列表。 ONT.list
是通过拆分ONT.list从中创建所有较小列表的主列表。
我的问题:有没有办法有效地自动化功能?预先感谢!
以下是我要执行的功能:
PercChangeDiff <- lapply(M10210102.list, function(x) {
INTdif <- c(NA, diff(x[["INTprice"]]))
EXTdif <- c(NA, diff(x[["EXTprice"]]))
INTperc <- (INTdif / x[["INTprice"]]) * 100
EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
return(list(x[["WEEK"]], INTperc, EXTperc))
}
)
for(i in seq_along(PercChangeDiff)){
names(PercChangeDiff[[i]]) <- c("WEEK","INTpercent", "EXTpercent")
}
#removing elements from list if they have fewer than 34 observations
for (i in rev(seq_along(PercChangeDiff))){
if (length(PercChangeDiff[[i]][["INTpercent"]]) < 34) (PercChangeDiff[[i] <- NULL)
}
#removing elements from list if INTprice or EXTprice does not change
for (i in rev(seq_along(PercChangeDiff))){
if (length(unique(PercChangeDiff[[i]][["INTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL)
if (length(unique(PercChangeDiff[[i]][["EXTpercent"]])) < 2) (PercChangeDiff[[i]] <- NULL)
}
#############
####### AGGREGATING MEANS PER DATE FOR ALL ARTS WITHIN MCH0 #######
#removing first date
for (i in seq_along(PercChangeDiff)){
PercChangeDiff[[i]][["WEEK"]][[1]] <- NA
}
#aggregating means
library(tidyverse)
PercChangeAvg <- map(PercChangeDiff,as_tibble) %>%
bind_rows %>%
group_by(WEEK) %>%
summarize_all(mean)
PercChangeAvg <- PercChangeAvg[complete.cases(PercChangeAvg), ]
############### CREATING TIME SERIES ####################
#create a list of timeseries values for INT and EXT
timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
#applying ccf to list of timeseries data
crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)
以下是我尝试使用第一个功能失败的内容:
for (g in seq_along(namesMCH0list)){
lapply(get(namesMCH0list[g]), function(x) {
INTdif <- c(NA, diff(x[["INTprice"]]))
EXTdif <- c(NA, diff(x[["EXTprice"]]))
INTperc <- (INTdif / x[["INTprice"]]) * 100
EXTperc <- (EXTdif / x[["EXTprice"]]) * 100
return( assign(paste("PercChangeDiff", paste(namesMCH0list[g]), sep = "."), list(x[["WEEK"]], INTperc, EXTperc)))
}
)
}
##this does not work ##
for (i in seq_along(ONT.list)){
x <<- paste(namesMCH0list[i])
for (g in rev(seq_along(get(x)))){
if (length((get(x))[[g]][["INTprice"]]) < 20) ((get(x))[[g]] <<- NULL)
}
}
for (g in seq_along(ONT.list)){
x <<- paste(namesMCH0list[g])
lapply(paste(x), function(x) {
if (length(get(x)[["INTprice"]]) < 20) (NULL)
})
}
for(w in seq_along(ONT.list)){
lapply(get(namesMCH0list[g]), function (x) {
if(length(x[["INTprice"]] < 34 )) (x <- NULL)
})
NULL
}
数据示例(一个包含两个元素的列表):
list(structure(list(WEEK = structure(c(17441, 17448, 17455, 17462, 17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546, 17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609, 17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672, 17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "20180929-EA", class = "factor"), MCH_0_CD = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97, 3.97), EXTprice = c(4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48, 4.48)), row.names = c(931L, 3626L, 6325L, 9021L, 11709L, 14368L, 17008L, 19764L, 22528L, 25193L, 27849L, 30489L, 33126L, 35769L, 38426L, 41141L, 44030L, 46911L, 49770L, 52643L, 55538L, 58423L, 61320L, 64256L, 67195L, 70117L, 73049L, 75982L, 78950L, 81924L, 84886L, 87848L, 90816L, 93778L), class = "data.frame"),
structure(list(WEEK = structure(c(17441, 17448, 17455, 17462,
17469, 17476, 17483, 17490, 17497, 17504, 17511, 17518, 17546,
17553, 17560, 17567, 17574, 17581, 17588, 17595, 17602, 17609,
17616, 17623, 17630, 17637, 17644, 17651, 17658, 17665, 17672,
17679, 17686, 17693), class = "Date"), REP_ART_UOM = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "20323113-EA", class = "factor"), MCH_0_CD = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = "M10210101", class = "factor"), INTprice = c(3.47,
3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47,
3.47, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2.47, 2.47, 2.47,
2.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47, 3.47), EXTprice = c(2,
2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2, 2,
2.03, 2, 2, 3.37, 3.37, 3.37, 3.37, 3.37, 3.37, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2)), row.names = c(1138L, 3834L,
6537L, 9232L, 11917L, 14547L, 17199L, 19956L, 22718L, 25381L,
28036L, 30673L, 33312L, 35955L, 38609L, 41357L, 44247L, 47124L,
49984L, 52859L, 55752L, 58636L, 61536L, 64470L, 67408L, 70330L,
73262L, 76204L, 79171L, 82147L, 85107L, 88068L, 91035L, 93994L
), class = "data.frame"))
答案 0 :(得分:0)
考虑使用内部数据框并从那里运行过滤。为了完全自动化,请在返回最后一个对象 crossCorrAvg 的主函数中概括您的过程。
master_function_process <- function(currlist) {
PercChangeDiff <- lapply(currlist, function(x) {
# NEW DATAFRAME COLUMNS
x$INTdif <- c(NA, diff(x$INTprice))
x$EXTdif <- c(NA, diff(x$EXTprice))
x$INTpercent <- (x$INTdif / x$INTprice) * 100
x$EXTpercent <- (x$EXTdif / x$EXTprice) * 100
# DATAFRAME SUBSETTED COLUMNS
tmp <- x[c("WEEK", "INTprice", "EXTprice", "INTpercent", "EXTpercent")]
# FILTERS
tmp <- tmp[tmp$INTpercent >= 34,]
tmp <- tmp[tmp$INTprice >= 2 | tmp$EXTprice >= 2,]
# REMOVE FIRST DATE
tmp$WEEK[[1]] <- NA
return(tmp)
})
# AVERAGE AGGREGATION BY WEEK (NO map)
PercChangeAvg <- bind_rows(PercChangeDiff) %>%
group_by(WEEK) %>%
summarize_all(mean) %>%
filter(complete.cases(.))
# TIME SERIES VALUES FOR INT AND EXT
timeINTavg <- ts(PercChangeAvg$INTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
timeEXTavg <- ts(PercChangeAvg$EXTpercent, frequency = 52, start = c(2017, 40), end = c(2018, 23))
# APPLY CCF AND ADD snames
crossCorrAvg <- ccf(timeINTavg, timeEXTavg, lag.max = 100)
crossCorrAvg <- lapply(crossCorrAvg, function(i) within(i, snames <- names(i))
}
# PROCESS ALL LISTS RETRIEVED WITH mget
master_crossCorrAvg_list <- lapply(mget(namesMCH0list), master_function_process)