我期待:
复制此功能:
If year = current
columns(7,8,9) = column(6)
Else
If year = current + 1
columns(8,9,10) = column(7)
Else
If year = current + 2
columns(9,10,11) = column(8)
Else
If year = current + 3
columns(10,11,12) = column9)
End If
End If
End If
End If
到目前为止,我已经能够使用以下不整洁的代码创建静态解决方案:
tbl.scholar1<-tbl.scholar1%>%mutate_cond(cohort == currentAY, ay_1819=ay_1718, ay_1920=ay_1718, ay_2021=ay_1718)
tbl.scholar1<-tbl.scholar1%>%mutate_cond(cohort == currentAY+1, ay_1920=ay_1819, ay_2021=ay_1819, ay_2122=ay_1819)
tbl.scholar1<-tbl.scholar1%>%mutate_cond(cohort == currentAY+2, ay_2021=ay_1920, ay_2122=ay_1920, ay_2223=ay_1920)
tbl.scholar1<-tbl.scholar1%>%mutate_cond(cohort == currentAY+3, ay_2122=ay_2021, ay_2223=ay_2021, ay_2324=ay_2021)
在一些黑客攻击后,我写了一个函数,它将当前年份和列名称作为输入:
tbl.scholar1<-dup.DF(tbl.scholar1, currentYR, "ay_1718", "ay_2324")
功能代码如下所示
dup.DF <- function(df1, currAY, name1, name2) {
df1%>%mutate_cond(cohort == currAY, UQ(rlang::sym(name2)) := UQ(rlang::sym(name1))) #This works!!!!
}
所以我知道有一个更优雅的解决方案,使用data.table,purrr:map或dplyr将动态变量作为向量或列表,这样我就不必重复我的函数n次迭代带有for循环。
The input looks like this....
SYSDATE ID name cohort fundCode ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425
0005-11-20 000000000 "last0, first" 1718 316001 1 0 0 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 1 0 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 1 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 0 1 0 0 0 0
我的预期输出是......
SYSDATE ID name cohort fundCode ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425
0005-11-20 000000000 "last0, first" 1718 316001 1 1 1 1 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 1 1 1 1 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 1 1 1 1 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 0 1 1 1 1 0
答案 0 :(得分:0)
更新答案 - 在获得OP对要求的澄清后,我不得不改变方法。
st_pos <- 6 #concerned column's start position in the given dataframe
df_bkp <- df #data backup
#rename concerned columns as "ay_1718", "ay_1819" etc
names(df)[st_pos:ncol(df)] <- paste("ay", paste0(as.numeric(substr(min(df$year), 1, 2)) + 0:(ncol(df) - st_pos),
as.numeric(substr(min(df$year), 3, 4)) + 0:(ncol(df) - st_pos)),
sep="_")
#copy "year" column's value to the ensuing three columns
cols <- names(df)[st_pos:ncol(df)] #renamed columns
mapply(function(x, y)
df[df$year == x & df$ID == y, which(grepl(x, cols)) + (st_pos-1):(st_pos+2)] <<-
df[df$year == x & df$ID == y, which(grepl(x, cols)) + (st_pos-1)],
df$year, df$ID)
给出了
> df
SYSDATE ID name year fundCode ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425
1 0005-11-20 0 last0, first 1718 316001 700 700 700 700 0 0 0 0
2 0005-11-20 1 last1, first 1819 316002 0 60 60 60 60 0 0 0
3 0005-11-20 2 last2, first 1920 316003 0 0 50 50 50 50 0 0
4 0005-11-20 3 last3, first 2021 316004 0 0 0 400 400 400 400 0
示例数据(注意:我已将Y1, Y2 etc
的值1
略微修改为其他值以用于说明目的)
df <- structure(list(SYSDATE = c("0005-11-20", "0005-11-20", "0005-11-20",
"0005-11-20"), ID = 0:3, name = c("last0, first", "last1, first",
"last2, first", "last3, first"), year = c(1718L, 1819L, 1920L,
2021L), fundCode = 316001:316004, Y1 = c(700L, 0L, 0L, 0L), Y2 = c(0L,
60L, 0L, 0L), Y3 = c(0L, 0L, 50L, 0L), Y4 = c(0L, 0L, 0L, 400L
), Y5 = c(0L, 0L, 0L, 0L), Y6 = c(0L, 0L, 0L, 0L), Y7 = c(0L,
0L, 0L, 0L), Y8 = c(0L, 0L, 0L, 0L)), .Names = c("SYSDATE", "ID",
"name", "year", "fundCode", "Y1", "Y2", "Y3", "Y4", "Y5", "Y6",
"Y7", "Y8"), class = "data.frame", row.names = c(NA, -4L))
# SYSDATE ID name year fundCode Y1 Y2 Y3 Y4 Y5 Y6 Y7 Y8
#1 0005-11-20 0 last0, first 1718 316001 700 0 0 0 0 0 0 0
#2 0005-11-20 1 last1, first 1819 316002 0 60 0 0 0 0 0 0
#3 0005-11-20 2 last2, first 1920 316003 0 0 50 0 0 0 0 0
#4 0005-11-20 3 last3, first 2021 316004 0 0 0 400 0 0 0 0
答案 1 :(得分:0)
这里有两种方法可以将数据从宽格式整形为长格式。
melt()
/ dcast()
library(data.table)
long <- melt(setDT(inp)[, rn := .I], measure.vars = patterns("ay_"))
long[order(rn, variable), value := replace(value, which(value == 1L)[1L] + 1:3, 1L), by = rn]
dcast(long, rn + ... ~ variable)
rn SYSDATE ID name cohort fundCode ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425 1: 1 0005-11-20 0 last0, first 1718 316001 1 1 1 1 0 0 0 0 2: 2 0005-11-20 0 last0, first 1718 316001 0 1 1 1 1 0 0 0 3: 3 0005-11-20 0 last0, first 1718 316001 0 0 1 1 1 1 0 0 4: 4 0005-11-20 0 last0, first 1718 316001 0 0 0 1 1 1 1 0
gather()
/ spread()
library(tidyr)
library(dplyr)
inp %>%
group_by(rn = row_number()) %>%
gather(, , starts_with("ay_")) %>%
mutate(value = replace(value, which(value == 1L)[1L] + 1:3, 1L)) %>%
spread(key, value)
# A tibble: 4 x 14 # Groups: rn [4] SYSDATE ID name cohort fundCode rn ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425 <chr> <int> <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> 1 0005-11-20 0 last0, first 1718 316001 1 1 1 1 1 0 0 0 0 2 0005-11-20 0 last0, first 1718 316001 2 0 1 1 1 1 0 0 0 3 0005-11-20 0 last0, first 1718 316001 3 0 0 1 1 1 1 0 0 4 0005-11-20 0 last0, first 1718 316001 4 0 0 0 1 1 1 1 0
inp <- fread('
SYSDATE ID name cohort fundCode ay_1718 ay_1819 ay_1920 ay_2021 ay_2122 ay_2223 ay_2324 ay_2425
0005-11-20 000000000 "last0, first" 1718 316001 1 0 0 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 1 0 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 1 0 0 0 0 0
0005-11-20 000000000 "last0, first" 1718 316001 0 0 0 1 0 0 0 0')