我一直在绞尽脑汁想弄清楚如何减少所需的代码量
这是我的代码。我知道必须有更好的方法。
MERGED1996.97.PP$OPEID<-as.character(MERGED1996.97.PP$OPEID)
MERGED1997.98.PP$OPEID<-as.character(MERGED1997.98.PP$OPEID)
MERGED1998.99.PP$OPEID<-as.character(MERGED1998.99.PP$OPEID)
MERGED1999.00.PP$OPEID<-as.character(MERGED1999.00.PP$OPEID)
MERGED2000.01.PP$OPEID<-as.character(MERGED2000.01.PP$OPEID)
MERGED2001.02.PP$OPEID<-as.character(MERGED2001.02.PP$OPEID)
MERGED2002.03.PP$OPEID<-as.character(MERGED2002.03.PP$OPEID)
MERGED2003.04.PP$OPEID<-as.character(MERGED2003.04.PP$OPEID)
MERGED2004.05.PP$OPEID<-as.character(MERGED2004.05.PP$OPEID)
MERGED2005.06.PP$OPEID<-as.character(MERGED2005.06.PP$OPEID)
MERGED2006.07.PP$OPEID<-as.character(MERGED2006.07.PP$OPEID)
MERGED2007.08.PP$OPEID<-as.character(MERGED2007.08.PP$OPEID)
MERGED2008.09.PP$OPEID<-as.character(MERGED2008.09.PP$OPEID)
MERGED2009.10.PP$OPEID<-as.character(MERGED2009.10.PP$OPEID)
MERGED2010.11.PP$OPEID<-as.character(MERGED2010.11.PP$OPEID)
MERGED2011.12.PP$OPEID<-as.character(MERGED2011.12.PP$OPEID)
MERGED2012.13.PP$OPEID<-as.character(MERGED2012.13.PP$OPEID)
MERGED2013.14.PP$OPEID<-as.character(MERGED2013.14.PP$OPEID)
MERGED2014.15.PP$OPEID<-as.character(MERGED2014.15.PP$OPEID)
MERGED2015.16.PP$OPEID<-as.character(MERGED2015.16.PP$OPEID)
setkey(MERGED199<-, "OPEas.character(
setkey(MERGED199<-, "OPEas.character(
setkey(MERGED199<-, "OPEID")
setkey(MERGED1999.00.PP, "OPEID")
setkey(MERGED2000.01.PP, "OPEID")
setkey(MERGED2001.02.PP, "OPEID")
setkey(MERGED2002.03.PP, "OPEID")
setkey(MERGED2003.04.PP, "OPEID")
setkey(MERGED2004.05.PP, "OPEID")
setkey(MERGED2005.06.PP, "OPEID")
setkey(MERGED2006.07.PP, "OPEID")
setkey(MERGED2007.08.PP, "OPEID")
setkey(MERGED2008.09.PP, "OPEID")
setkey(MERGED2009.10.PP, "OPEID")
setkey(MERGED2010.11.PP, "OPEID")
setkey(MERGED2011.12.PP, "OPEID")
setkey(MERGED2012.13.PP, "OPEID")
setkey(MERGED2013.14.PP, "OPEID")
setkey(MERGED2014.15.PP, "OPEID")
setkey(MERGED2015.16.PP, "OPEID")
MERGED1996.97.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED1996.97.PP))), 9,12)]
MERGED1997.98.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED1997.98.PP))), 9,12)]
MERGED1998.99.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED1998.99.PP))), 9,12)]
MERGED1999.00.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED1999.00.PP))), 9,12)]
MERGED2000.01.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2000.01.PP))), 9,12)]
MERGED2001.02.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2001.02.PP))), 9,12)]
MERGED2002.03.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2002.03.PP))), 9,12)]
MERGED2003.04.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2003.04.PP))), 9,12)]
MERGED2004.05.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2004.05.PP))), 9,12)]
MERGED2005.06.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2005.06.PP))), 9,12)]
MERGED2006.07.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2006.07.PP))), 9,12)]
MERGED2007.08.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2007.08.PP))), 9,12)]
MERGED2008.09.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2008.09.PP))), 9,12)]
MERGED2009.10.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2009.10.PP))), 9,12)]
MERGED2010.11.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2010.11.PP))), 9,12)]
MERGED2011.12.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2011.12.PP))), 9,12)]
MERGED2012.13.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2012.13.PP))), 9,12)]
MERGED2013.14.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2013.14.PP))), 9,12)]
MERGED2014.15.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2014.15.PP))), 9,12)]
MERGED2015.16.PP[, ay :=substr(gsub("\\.","",deparse(substitute(MERGED2015.16.PP))), 9,12)]
chinsoon12中的代码效果很好。现在,我需要更改列名。我添加了
colnames(df)[1] <- "UNITID"
到代码
lapply(dfLs, function(nm) {
df <- get(nm)
setDT(df)[, OPEID := as.character(OPEID)]
setkeyv(df, "OPEID")
colnames(df)[1] <- "UNITID"
df[, ay := substr(gsub("\\.", "", nm), 9, 12)]
})
,但不会修改第一列的名称。任何建议都将不胜感激。
谢谢。
答案 0 :(得分:0)
以下作品:
sapply(c('data.table', 'ggplot2', 'tictoc'), library, character.only = T, quietly = T)
set.seed(28)
nobv <- 1000 # number of observations per data.table
l <- lapply(1:5, function(z){
ind <- sample(x = 1:nrow(diamonds), size = nobv, replace = T)
return(as.data.table(diamonds[ind,]))
})
# Name all the data.tables
names(l) <- LETTERS[1:length(l)]
cols_to_change <- c('carat', 'z')
# changing cols_to_change to character
lapply(l, function(z){
z[, (cols_to_change) := lapply(.SD, as.character), .SDcols = cols_to_change]
})
# set key for all data.table
key_by_cols <- c('carat', 'z')
lapply(l, function(z){
setkeyv(z, key_by_cols)
})
# Add a name column for the data.tables
lapply(1:length(l), function(z){
l[[z]]$dtname <<- names(l)[z]
})
答案 1 :(得分:0)
您可能可以执行以下操作:
dfLs <- c("MERGED1996.97.PP","MERGED1997.98.PP","MERGED1998.99.PP","MERGED1999.00.PP",
"MERGED2000.01.PP","MERGED2001.02.PP","MERGED2002.03.PP","MERGED2003.04.PP",
"MERGED2004.05.PP","MERGED2005.06.PP","MERGED2006.07.PP","MERGED2007.08.PP",
"MERGED2008.09.PP","MERGED2009.10.PP","MERGED2010.11.PP","MERGED2011.12.PP",
"MERGED2012.13.PP","MERGED2013.14.PP","MERGED2014.15.PP","MERGED2015.16.PP")
library(data.table)
lapply(dfLs, function(nm) {
df <- get(nm)
setDT(df)[, OPEID := as.character(OPEID)]
setkeyv(df, "OPEID")
df[, ay := substr(gsub("\\.", "", nm), 9, 12)]
})
答案 2 :(得分:0)
如果我理解正确,OP必须处理会计数据,这些数据包含在20个单独的数据框中,每个会计年度一个。
OP要求减少执行某些操作所需的代码量。
如果所有数据框确实具有相同的结构(编号,名称,类型和列顺序),则建议将数据绑定到一个大数据对象中。这将大大简化所有操作,包括分组和拆分。
library(data.table)
# create vector of dataframe names
df_names <- sapply(1996:2015, function(yr) sprintf("MERGED%4i.%02i.PP", yr, (yr + 1)%% 100))
# Alternatively, the names can be grabbed from the global environment
df_names <- grep("^MERGED.*PP$", ls(), value = TRUE)
# combine into one data object, prepend id column
big <- rbindlist(mget(df_names), idcol = "df_name")
big
df_name V1 OPEID V3 <char> <fctr> <num> <num> 1: MERGED1996.97.PP C 199601 0.2774292 2: MERGED1996.97.PP P 199602 1.0844412 3: MERGED1997.98.PP A 199701 0.4291247 4: MERGED1997.98.PP F 199702 0.5060559 5: MERGED1998.99.PP H 199801 -0.5466319 6: MERGED1998.99.PP X 199802 -0.5644520 --- 35: MERGED2013.14.PP H 201301 -1.1088896 36: MERGED2013.14.PP F 201302 -1.0149620 37: MERGED2014.15.PP L 201401 0.5630558 38: MERGED2014.15.PP A 201402 1.6478175 39: MERGED2015.16.PP F 201501 1.6059096 40: MERGED2015.16.PP W 201502 -1.1578085
现在我们可以执行以下请求的操作
# change type of OPEID column
big[, OPEID := as.character(OPEID)]
# rename second column (used to be first column before rbindlist())
setnames(BIG, 2, "UNITID")
# append column with accounting year id
big[, ay := substr(gsub("\\.", "", df_name), 9, 12)]
big[]
df_name UNITID OPEID V3 ay <char> <fctr> <num> <num> <char> 1: MERGED1996.97.PP C 199601 0.2774292 9697 2: MERGED1996.97.PP P 199602 1.0844412 9697 3: MERGED1997.98.PP A 199701 0.4291247 9798 4: MERGED1997.98.PP F 199702 0.5060559 9798 5: MERGED1998.99.PP H 199801 -0.5466319 9899 6: MERGED1998.99.PP X 199802 -0.5644520 9899 --- 35: MERGED2013.14.PP H 201301 -1.1088896 1314 36: MERGED2013.14.PP F 201302 -1.0149620 1314 37: MERGED2014.15.PP L 201401 0.5630558 1415 38: MERGED2014.15.PP A 201402 1.6478175 1415 39: MERGED2015.16.PP F 201501 1.6059096 1516 40: MERGED2015.16.PP W 201502 -1.1578085 1516
请注意,由于setkey()
的语法和辅助索引,我不再调用on =
,因为联接不再需要它。
警告:以下代码将在全局环境中创建20个数据框。
set.seed(1234L)
for (yr in 1996:2015) {
nm <- sprintf("MERGED%4i.%02i.PP", yr, (yr + 1L) %% 100L)
tmp <- data.frame(V1 = sample(LETTERS, 2L), OPEID = 100* yr + 1:2, V3 = rnorm(2L))
assign(nm, tmp)
}
ls()
[1] "big" "df_names" "MERGED1996.97.PP" "MERGED1997.98.PP" "MERGED1998.99.PP" "MERGED1999.00.PP" [7] "MERGED2000.01.PP" "MERGED2001.02.PP" "MERGED2002.03.PP" "MERGED2003.04.PP" "MERGED2004.05.PP" "MERGED2005.06.PP" [13] "MERGED2006.07.PP" "MERGED2007.08.PP" "MERGED2008.09.PP" "MERGED2009.10.PP" "MERGED2010.11.PP" "MERGED2011.12.PP" [19] "MERGED2012.13.PP" "MERGED2013.14.PP" "MERGED2014.15.PP" "MERGED2015.16.PP" "nm" "tmp" [25] "yr"