使用dplyr
从上一行减去一个值时遇到问题。
graph_data_frame1
# A tibble: 13 x 5
# Groups: Trade_Date [10]
Trade_Date B_S Lots_Total Avg_Price atr
<dttm> <chr> <dbl> <dbl> <dbl>
1 2017-12-11 00:00:00 B 88 3591 43.3
2 2017-12-11 00:00:00 S 88 3586 43.3
3 2017-12-08 00:00:00 B 176 3594 40.7
4 2017-12-08 00:00:00 S 176 3599. 40.7
5 2017-12-07 00:00:00 NA NA NA 40.3
6 2017-12-06 00:00:00 NA NA NA 41.8
7 2017-12-05 00:00:00 NA NA NA 40.3
8 2017-12-04 00:00:00 B 44 3563 39.5
9 2017-12-04 00:00:00 S 44 3569 39.5
10 2017-12-01 00:00:00 NA NA NA 42.7
11 2017-11-30 00:00:00 NA NA NA 46.2
12 2017-11-29 00:00:00 NA NA NA 47.2
13 2017-11-28 00:00:00 NA NA NA 47.3
atr_diff_df <- graph_data_frame1 %>%
group_by(Trade_Date) %>%
arrange(desc(Trade_Date)) %>%
mutate(atr_diff = atr - lag(atr, default = first(atr)))
atr_diff_df
# A tibble: 13 x 6
# Groups: Trade_Date [10]
Trade_Date B_S Lots_Total Avg_Price atr atr_diff
<dttm> <chr> <dbl> <dbl> <dbl> <dbl>
1 2017-12-11 00:00:00 B 88 3591 43.3 0
2 2017-12-11 00:00:00 S 88 3586 43.3 0
3 2017-12-08 00:00:00 B 176 3594 40.7 0
4 2017-12-08 00:00:00 S 176 3599. 40.7 0
5 2017-12-07 00:00:00 NA NA NA 40.3 0
6 2017-12-06 00:00:00 NA NA NA 41.8 0
7 2017-12-05 00:00:00 NA NA NA 40.3 0
8 2017-12-04 00:00:00 B 44 3563 39.5 0
9 2017-12-04 00:00:00 S 44 3569 39.5 0
10 2017-12-01 00:00:00 NA NA NA 42.7 0
11 2017-11-30 00:00:00 NA NA NA 46.2 0
12 2017-11-29 00:00:00 NA NA NA 47.2 0
13 2017-11-28 00:00:00 NA NA NA 47.3 0
graph_data_frame1
# A tibble: 13 x 6
# Groups: Trade_Date [10]
Trade_Date B_S Lots_Total Avg_Price atr atr_diff
<dttm> <chr> <dbl> <dbl> <dbl> <dbl>
1 2017-12-11 00:00:00 B 88 3591 43.3 0
2 2017-12-11 00:00:00 S 88 3586 43.3 2.61
3 2017-12-08 00:00:00 B 176 3594 40.7 0
4 2017-12-08 00:00:00 S 176 3599. 40.7 0.382
5 2017-12-07 00:00:00 NA NA NA 40.3 -1.53
6 2017-12-06 00:00:00 NA NA NA 41.8 1.55
7 2017-12-05 00:00:00 NA NA NA 40.3 0.754
8 2017-12-04 00:00:00 B 44 3563 39.5 0
9 2017-12-04 00:00:00 S 44 3569 39.5 -3.21
10 2017-12-01 00:00:00 NA NA NA 42.7 -3.47
11 2017-11-30 00:00:00 NA NA NA 46.2 -0.971
12 2017-11-29 00:00:00 NA NA NA 47.2 -0.118
13 2017-11-28 00:00:00 NA NA NA 47.3 NA
我可以使用R底基得到我想要的结果,但是想使用dplyr。
structure(list(Trade_Date = structure(c(1512950400, 1512950400,
1512691200, 1512691200, 1512604800, 1512518400, 1512432000, 1512345600,
1512345600, 1512086400, 1.512e+09, 1511913600, 1511827200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), B_S = c("B", "S", "B", "S", NA, NA,
NA, "B", "S", NA, NA, NA, NA), Lots_Total = c(88, 88, 176, 176,
NA, NA, NA, 44, 44, NA, NA, NA, NA), Avg_Price = c(3591, 3586,
3594, 3598.85714285714, NA, NA, NA, 3563, 3569, NA, NA, NA, NA
), atr = c(43.2857142857143, 43.2857142857143, 40.6734693877551,
40.6734693877551, 40.2915451895044, 41.8213244481466, 40.2754209555542,
39.5217893904751, 39.5217893904751, 42.7329623346929, 46.1996820011654,
47.1711560009989, 47.2895622865705)), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -13L))
答案 0 :(得分:0)
主要问题是与group_by()
通话有关。查看您的数据(我在您的ungroup(dat)
输出上做了dput()
,可以看到每个组的 由group_by(Trade_Date)
您的atr
定义变量的长度为1或2。在前一种情况下,两个值相同。因此,当您在lag(atr, default = first(atr))
函数中调用mutate()
时,当定义的组中有两个值时,它将返回两个值 identical 与attr
中对应的值。组,或者如果定义组中attr
的长度等于1,则仅与attr
的第一个(也是唯一一个)值相同相同。这样当您进行减法运算时,您总是会减去相同的值,这显然会产生0
。
因此,对数据进行分组时会发生什么情况:
library(dplyr)
dat %>%
group_by(Trade_Date) %>%
arrange(desc(Trade_Date)) %>%
mutate(first_term = atr, second_term = lag(atr, default = first(atr)),
difference = atr - lag(atr, default = first(atr)))
# # A tibble: 13 x 8
# # Groups: Trade_Date [10]
# Trade_Date B_S Lots_Total Avg_Price atr first_term second_term difference
# <dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2017-12-11 00:00:00 B 88 3591 43.3 43.3 43.3 0
# 2 2017-12-11 00:00:00 S 88 3586 43.3 43.3 43.3 0
# 3 2017-12-08 00:00:00 B 176 3594 40.7 40.7 40.7 0
# 4 2017-12-08 00:00:00 S 176 3599. 40.7 40.7 40.7 0
# 5 2017-12-07 00:00:00 NA NA NA 40.3 40.3 40.3 0
# 6 2017-12-06 00:00:00 NA NA NA 41.8 41.8 41.8 0
# 7 2017-12-05 00:00:00 NA NA NA 40.3 40.3 40.3 0
# 8 2017-12-04 00:00:00 B 44 3563 39.5 39.5 39.5 0
# 9 2017-12-04 00:00:00 S 44 3569 39.5 39.5 39.5 0
#10 2017-12-01 00:00:00 NA NA NA 42.7 42.7 42.7 0
#11 2017-11-30 00:00:00 NA NA NA 46.2 46.2 46.2 0
#12 2017-11-29 00:00:00 NA NA NA 47.2 47.2 47.2 0
#13 2017-11-28 00:00:00 NA NA NA 47.3 47.3 47.3 0
且未分组:
dat %>%
arrange(desc(Trade_Date)) %>%
mutate(first_term = atr, second_term = lag(atr, default = first(atr)),
difference = atr - lag(atr, default = first(atr)))
# # A tibble: 13 x 8
# Trade_Date B_S Lots_Total Avg_Price atr first_term second_term difference
# <dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2017-12-11 00:00:00 B 88 3591 43.3 43.3 43.3 0
# 2 2017-12-11 00:00:00 S 88 3586 43.3 43.3 43.3 0
# 3 2017-12-08 00:00:00 B 176 3594 40.7 40.7 43.3 -2.61
# 4 2017-12-08 00:00:00 S 176 3599. 40.7 40.7 40.7 0
# 5 2017-12-07 00:00:00 NA NA NA 40.3 40.3 40.7 -0.382
# 6 2017-12-06 00:00:00 NA NA NA 41.8 41.8 40.3 1.53
# 7 2017-12-05 00:00:00 NA NA NA 40.3 40.3 41.8 -1.55
# 8 2017-12-04 00:00:00 B 44 3563 39.5 39.5 40.3 -0.754
# 9 2017-12-04 00:00:00 S 44 3569 39.5 39.5 39.5 0
#10 2017-12-01 00:00:00 NA NA NA 42.7 42.7 39.5 3.21
#11 2017-11-30 00:00:00 NA NA NA 46.2 46.2 42.7 3.47
#12 2017-11-29 00:00:00 NA NA NA 47.2 47.2 46.2 0.971
#13 2017-11-28 00:00:00 NA NA NA 47.3 47.3 47.2 0.118
看到结果向量不再是零。
我绝对不清楚,您想对数据执行哪种转换,以及为什么要这样做。但是您可以通过以下方式获得所需的输出:
dat %>%
arrange(desc(Trade_Date)) %>%
mutate(first_term = atr, second_term = lag(atr, default = first(atr)),
difference = atr - lead(atr))
# # A tibble: 13 x 8
# Trade_Date B_S Lots_Total Avg_Price atr first_term second_term difference
# <dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 2017-12-11 00:00:00 B 88 3591 43.3 43.3 43.3 0
# 2 2017-12-11 00:00:00 S 88 3586 43.3 43.3 43.3 2.61
# 3 2017-12-08 00:00:00 B 176 3594 40.7 40.7 43.3 0
# 4 2017-12-08 00:00:00 S 176 3599. 40.7 40.7 40.7 0.382
# 5 2017-12-07 00:00:00 NA NA NA 40.3 40.3 40.7 -1.53
# 6 2017-12-06 00:00:00 NA NA NA 41.8 41.8 40.3 1.55
# 7 2017-12-05 00:00:00 NA NA NA 40.3 40.3 41.8 0.754
# 8 2017-12-04 00:00:00 B 44 3563 39.5 39.5 40.3 0
# 9 2017-12-04 00:00:00 S 44 3569 39.5 39.5 39.5 -3.21
#10 2017-12-01 00:00:00 NA NA NA 42.7 42.7 39.5 -3.47
#11 2017-11-30 00:00:00 NA NA NA 46.2 46.2 42.7 -0.971
#12 2017-11-29 00:00:00 NA NA NA 47.2 47.2 46.2 -0.118
#13 2017-11-28 00:00:00 NA NA NA 47.3 47.3 47.2 NA