我有以下数据表:
dt <- fread("
ID | EO_1 | EO_2 | EO_3 | GROUP
ID_001 | 0.5 | 1.2 | | A
ID_002 | | | | A
ID_003 | | | | A
ID_004 | | | | A
ID_001 | 0.4 | 2.5 | | B
ID_002 | | | | B
ID_003 | | | | B
ID_004 | | | | B
",
sep = "|",
colClasses = c("character", "numeric", "numeric", "numeric", "character"))
,我正在尝试执行一些按行的操作,这些操作有时取决于前几行中的数据。更具体地说:
calc_EO_1 <- function(
EO_1,
EO_2
){
EO_1 <- shift(EO_1, type = "lag") * shift(EO_2, type = "lag")
return(EO_1)
}
calc_EO_2 <- function(
EO_1,
EO_2,
EO_3
){
EO_2 <- EO_1 * shift(EO_2, type = "lag") * shift(EO_3, type = "lag")
return(EO_2)
}
calc_EO_3 <- function(
EO_1,
EO_2
){
EO_3 <- EO_1 * EO_2
return(EO_3)
}
最后一个需要从第一行开始计算,因为它取决于其他字段(应该很容易),然后,这三个操作都必须连续且逐行进行。
离我最近的是:
first_row_bygroup_index <- dt[, .I[1], by = GROUP]$V1
dt[first_row_bygroup_index,
EO_3 := calc_EO_3(EO_1, EO_2)
]
dt[!first_row_bygroup_index,
`:=` (
EO_1 = calc_EO_1(EO_1, EO_2),
EO_2 = calc_EO_2(EO_1, EO_2, EO_3),
EO_3 = calc_EO_3(EO_1, EO_2)
),
by = row.names(dt[!first_row_bygroup_index])]
但它只能正确计算第一行:
ID | EO_1 | EO_2 | EO_3 | GROUP
ID_001 | 0.5 | 1.2 | 0.6 | A
ID_002 | | | | A
ID_003 | | | | A
ID_004 | | | | A
ID_001 | 0.4 | 2.5 | 1.0 | B
ID_002 | | | | B
ID_003 | | | | B
ID_004 | | | | B
成为那些空格NAs。
我认为我离解决方案不太远,但是我找不到找到使之可行的方法。问题是我无法使用子集外部的行在行子集中执行操作。
编辑 我错过了预期的结果:
ID | EO_1 | EO_2 | EO_3 | GROUP
ID_001 | 0.50000000 | 1.20000000 | 0.60000000 | A
ID_002 | 0.60000000 | 0.43200000 | 0.25920000 | A
ID_003 | 0.25920000 | 0.02902376 | 0.00752296 | A
ID_004 | 0.00752296 | 0.00000164 | 0.00000001 | A
ID_001 | 0.40000000 | 2.50000000 | 1.00000000 | B
ID_002 | 1.00000000 | 2.50000000 | 2.50000000 | B
ID_003 | 2.50000000 | 15.62500000 | 39.06250000 | B
ID_004 | 39.06250000 | 23841.8580000 | 931322.57810000 | B
NEW EDIT 我想出了以下代码段,但我宁愿稍等一下,看看是否有人可以获得比此方法更有效的解决方案:
while(any(is.na(dt))){
dt[, `:=` (
EO_3 = calc_EO_3(EO_1, EO_2),
EO_1 = ifelse(ID == "ID_001", EO_1, calc_EO_1(EO_1, EO_2)),
EO_2 = ifelse(ID == "ID_001", EO_2, calc_EO_2(EO_1, EO_2, EO_3))
)]
}
我已经提出了一个类似的dplyr解决方案,同时也提供了难看的while循环修复。关键是找到一种进行按行计算的方法,该方法可以从前一行获取信息,即使该行位于所选子集之外。我希望有人可以改善它,因此我将稍等一下,然后将其标记为解决方案。
答案 0 :(得分:2)
这是另一种可能的方法:
dt[!is.na(EO_1), EO_3 := EO_1 * EO_2, by=.(GROUP)]
dt[ID!="ID_001", c("EO_1", "EO_2", "EO_3") :=
dt[,
{
eo1 <- EO_1[1L]; eo2 <- EO_2[1L]; eo3 <- EO_3[1L]
.SD[ID!="ID_001",
{
eo1 <- eo1 * eo2
eo2 <- eo1 * eo2 * eo3
eo3 <- eo1 * eo2
.(eo1, eo2, eo3)
},
by=.(ID)]
},
by=.(GROUP)][, -1L:-2L]
]
输出:
ID EO_1 EO_2 EO_3 GROUP
1: ID_001 0.50000000 1.200000e+00 6.000000e-01 A
2: ID_002 0.60000000 4.320000e-01 2.592000e-01 A
3: ID_003 0.25920000 2.902376e-02 7.522960e-03 A
4: ID_004 0.00752296 1.642598e-06 1.235720e-08 A
5: ID_001 0.40000000 2.500000e+00 1.000000e+00 B
6: ID_002 1.00000000 2.500000e+00 2.500000e+00 B
7: ID_003 2.50000000 1.562500e+01 3.906250e+01 B
8: ID_004 39.06250000 2.384186e+04 9.313226e+05 B
答案 1 :(得分:1)
您希望最终产品看起来像这样的数据吗?
go <- function(x, y, n) {
z <- x * y
for (i in 1:(n - 1)) {
x <- c(x[1] * y[1], x)
y <- c(x[1] * y[1] * z[1], y)
z <- x * y
}
data.table(EO_1 = x, EO_2 = y, EO_3 = z)[.N:1][, lapply(.SD, round, 8)]
}
go(.5, 1.2, 4)
EO_1 EO_2 EO_3
1: 0.50000000 1.20000000 0.60000000
2: 0.60000000 0.43200000 0.25920000
3: 0.25920000 0.02902376 0.00752296
4: 0.00752296 0.00000164 0.00000001
答案 2 :(得分:1)
棘手的问题!我尝试使用dplyr中的nest并应用costum函数。
options("scipen"=999, "digits"=8)
library(tidyverse)
# Custom function
logic <- function(.df){
for(i in 2:nrow(.df)){
.df[i, "EO_1"] <- .df[i-1, "EO_1"] * .df[i-1, "EO_2"]
.df[i, "EO_2"] <- .df[i, "EO_1"] * .df[i-1, "EO_2"] * .df[i-1, "EO_3"]
.df[i, "EO_3"] <- .df[i, "EO_1"] * .df[i, "EO_2"]
}
.df
}
# Answers the question
dt <- dt %>%
mutate(EO_3 = EO_1 * EO_2) %>%
nest(-GROUP) %>%
mutate(data = map(data, ~logic(.))) %>%
unnest()
# Fixing nice output
dt %>%
mutate_at(vars(contains("EO_")), ~round(., 8)) %>%
select(-GROUP, everything(), GROUP) %>%
as.data.frame()
给你
ID EO_1 EO_2 EO_3 GROUP
1 ID_001 0.50000000 1.20000000 0.60000000 A
2 ID_002 0.60000000 0.43200000 0.25920000 A
3 ID_003 0.25920000 0.02902376 0.00752296 A
4 ID_004 0.00752296 0.00000164 0.00000001 A
5 ID_001 0.40000000 2.50000000 1.00000000 B
6 ID_002 1.00000000 2.50000000 2.50000000 B
7 ID_003 2.50000000 15.62500000 39.06250000 B
8 ID_004 39.06250000 23841.85791016 931322.57461548 B