好吧,所以我有一个时间序列数据集(请参阅虚拟数据),并且当值B满足特定条件(> 100)且当B为B时,我想对值A进行突变并创建新变量(Xi,Xii,Xiii)。 <100我想要Xi,Xi等... = A值。
我设法做到了这一点,但是当B为<100时,这使我在新值中具有NAs-有一种方法可以更有效地将它们组合在一起:
df2 <- df %>%
left_join( #this will grab the value when B > 100 of each day
df %>% filter(B >100) %>% select(B, X = A),
by = 'B'
) %>%
mutate(
Xi = ( X*0.1 ) + A
)%>%
mutate(
Xii = ( X*0.5 ) + A
)%>%
mutate(
Xiii = ( X*0.9 ) + A
)
***编辑 输出应如下所示:
output <- structure(list(datetime = structure(c(1467354545, 1467358145,
1467361745, 1467365345, 1467368945, 1467372545), class = c("POSIXct",
"POSIXt"), tzone = "Canada/Mountain"), A = c(0.3896284155, 0.5047586155,
0.4988, 0.5978083335, 0.610775, 0.665325), B = c(74.41898788,
58.563069355, 43.227076165, 26.66528807, 13.70157303, 6.8983046415
), Xi = c(0.42859125705, 0.55523447705, NA, NA, NA, NA), Xii =
c(0.58444262325,
0.75713792325, NA, NA, NA, NA), Xiii = c(0.74029398945, 0.95904136945,
NA, NA, NA, NA)), row.names = c(NA, 6L), class = "data.frame")
但是NA在哪里,这些值应该= A。
### dummy data
df<-structure(list(datetime = structure(c(1467354545, 1467358145,
1467361745, 1467365345, 1467368945, 1467372545, 1467376145, 1467379745,
1467383345, 1467386945, 1467390545, 1467394145, 1467397745, 1467401345,
1467404945, 1467408545, 1467412145, 1467415745, 1467419345, 1467422945,
1467426545, 1467430145, 1467433745, 1467437345, 1467440945, 1467444545,
1467448145, 1467451745, 1467455345, 1467458945, 1467462545, 1467466145,
1467469745, 1467473345, 1467476945, 1467480545, 1467484145, 1467487745,
1467491345, 1467494945, 1467498545, 1467502145, 1467505745, 1467509345,
1467512945, 1467516545, 1467520145, 1467523745), class = c("POSIXct",
"POSIXt"), tzone = "Canada/Mountain"), A = c(0.3896284155, 0.5047586155,
0.4988, 0.5978083335, 0.610775, 0.665325, 0.61445, 0.6224416665,
0.6195833335, 0.6156666665, 0.628616667, 0.594925, 0.592366667,
0.448325, 0.4093166665, 0.4076166665, 0.4199666665, 0.3724, 0.3550416665,
0.4245833335, 0.406518785, 0.2448416665, 0.2305666665, 0.325375,
0.3413166665, 0.409642232, 0.400575, 0.4452833335, 0.48665, 0.5067833335,
0.563375, 0.5323416665, 0.499816667, 0.49065, 0.480566667, 0.4535833335,
0.491475, 0.370733333, 0.2932916665, 0.307991667, 0.375575, 0.363816667,
0.220041667, 0.2227583335, 0.2400166665, 0.226133333, 0.25015,
0.286516667), B = c(74.41898788, 58.563069355, 43.227076165,
26.66528807, 13.70157303, 6.8983046415, 4.051383457, 3.9928000525,
6.6264775935, 11.56529787, 19.381280495, 39.294139195, 63.738904315,
87.15298014, 99.92137692, 123.905882, 132.52431235, 122.1168814,
112.8298307, 70.39181064, 68.67000666, 181.4195973, 179.78868045,
95.658796805, 70.15210006, 53.19819125, 57.79353269, 21.815294445,
17.00485432, 8.294689507, 6.159616564, 6.447905295, 11.06234652,
18.723238995, 34.42328062, 61.348978945, 71.989398775, 113.3879366,
124.0605001, 122.3875736, 101.9667852, 91.15826362, 192.1395999,
192.19658895, 180.7257523, 171.3751495, 157.8631188, 126.9319141
)), class = "data.frame", row.names = c(NA, -48L))
答案 0 :(得分:1)
library(dplyr)
df2 <-
df %>%
mutate(x = if_else(B > 100, A, A),
Xi = if_else(B > 100, x*0.1 + A, A),
Xii = if_else(B > 100, x*0.5 + A, A),
Xiii = if_else(B > 100, x*0.9 + A, A))
答案 1 :(得分:0)
除了可重复的示例外,您还应该提供预期的结果。
为什么要创建新变量,您打算稍后将它们折叠到单个列中?如果是这样,您现在可以只制作一列。您可以将ifelse()
用于两个结果,也可以将dplyr::case_when()
用于两个以上。
library(dplyr)
iris %>%
mutate(new_petal_length = case_when(Petal.Length <= 2 ~ "A",
Petal.Length <= 5 ~ "B",
TRUE ~ "C")) %>%
select(Petal.Length, new_petal_length) %>%
sample_n(10)
#> Petal.Length new_petal_length
#> 6.6 C
#> 3.9 B
#> 1.4 A
#> 4.5 B
#> 3.0 B
#> 1.4 A
#> 4.5 B
#> 5.0 B
#> 1.5 A
#> 5.5 C
请注意,case_when()
一旦达到TRUE结果就立即停止评估,这就是Petal.Length <= 5
不会覆盖Petal.Length <= 2
的原因。最后的TRUE
是所有先前案例中没有一个评估为TRUE
时的综合值。