我有一个示例数据集
temp=structure(list(TimeStamp = structure(c(1360368000, 1360368600,
1360369200, 1360369800, 1360370400, 1360371000, 1360371600, 1360372200,
1360372800, 1360373400, 1360374000, 1360374600, 1360375200, 1360375800,
1360376400, 1360377000, 1360377600, 1360378200, 1360378800, 1360379400,
1360380000, 1360380600, 1360381200, 1360381800, 1360382400, 1360383000,
1360383600, 1360384200, 1360384800, 1360385400, 1360386000, 1360386600,
1360387200, 1360387800, 1360388400, 1360389000, 1360389600, 1360390200,
1360390800, 1360391400, 1360392000, 1360392600, 1360393200, 1360393800,
1360394400, 1360395000, 1360395600, 1360396200, 1360396800, 1360397400,
1360398000, 1360398600, 1360399200, 1360399800, 1360400400, 1360401000,
1360401600, 1360402200, 1360402800, 1360403400, 1360404000, 1360404600,
1360405200, 1360405800, 1360406400, 1360407000, 1360407600, 1360408200,
1360408800, 1360409400, 1360410000, 1360410600, 1360411200, 1360411800,
1360412400, 1360413000, 1360413600, 1360414200, 1360414800, 1360415400,
1360416000, 1360416600, 1360417200, 1360417800, 1360418400, 1360419000,
1360419600, 1360420200, 1360420800, 1360421400, 1360422000, 1360422600,
1360423200, 1360423800, 1360424400, 1360425000, 1360425600, 1360426200,
1360426800, 1360427400), class = c("POSIXct", "POSIXt"), tzone = "GMT"),
MeanWindSpeed = c(10, 10, 9.7, 9.8, 9.1, 9.1, 9.3, 9.3, 9.8,
9.8, 10.3, 10.4, 10.2, 11, 11.4, 12.1, 11.9, 11.5, 11.3,
11.1, 10.9, 11, 11.1, 11.1, 11.1, 11.5, 11.1, 11.1, 10.8,
10.7, 10.9, 11.3, 11.6, 11.1, 10.7, 10.2, 10, 9.5, 9.3, 9.2,
9.8, 10.4, 11.1, 11.5, 11.8, 11.5, 11.4, 11.1, 11.2, 10.8,
9.5, 9, 8.3, 8, 7.6, 8.8, 9.6, 10.7, 10.1, 9.7, 10.1, 10.1,
9.9, 9.6, 9.5, 9.3, 9.7, 9.3, 9.3, 9, 9.3, 8.9, 9.4, 9.8,
9.8, 9.2, 9.9, 9.3, 9.9, 10, 9.8, 9.2, 8.9, 8.4, 7.9, 8.2,
8.1, 8.3, 8.5, 8.6, 8.3, 8.8, 8.1, 8.4, 8, 8.5, 8.6, 8.8,
8.2, 8.7), Direction = c(19, 21, 21, 19, 18, 20, 22, 19,
19, 15, 13, 11, 8, 10, 11, 12, 14, 16, 17, 17, 17, 17, 18,
17, 19, 21, 20, 20, 18, 18, 16, 16, 17, 10, 12, 17, 19, 23,
22, 25, 25, 24, 25, 25, 25, 24, 23, 26, 26, 27, 30, 29, 29,
28, 29, 31, 31, 31, 31, 30, 33, 32, 32, 30, 31, 33, 33, 32,
31, 30, 32, 34, 37, 35, 34, 35, 30, 30, 27, 24, 25, 23, 26,
28, 24, 27, 27, 31, 21, 18, 16, 19, 24, 22, 21, 24, 26, 17,
20, 16), TurbInt = c(0.01, 0.02, 0.0309, 0.0204, 0.033, 0.022,
0.0323, 0.0215, 0.0204, 0.0204, 0.0194, 0.0192, 0.0196, 0.0182,
0.0175, 0.0165, 0.0168, 0.0087, 0.0177, 0.009, 0.0183, 0.0182,
0.018, 0.009, 0.018, 0.0348, 0.027, 0.018, 0.0185, 0.028,
0.0183, 0.0088, 0.0172, 0.018, 0.028, 0.0196, 0.04, 0.0316,
0.0215, 0.0217, 0.0204, 0.0288, 0.027, 0.0261, 0.0254, 0.0261,
0.0351, 0.027, 0.0268, 0.0278, 0.0421, 0.0556, 0.0602, 0.075,
0.0921, 0.1136, 0.0833, 0.0841, 0.0792, 0.0619, 0.0693, 0.0594,
0.0606, 0.0833, 0.0632, 0.0753, 0.0722, 0.0538, 0.086, 0.1111,
0.0645, 0.1011, 0.0745, 0.102, 0.0918, 0.0978, 0.0808, 0.086,
0.101, 0.1, 0.1122, 0.1087, 0.1011, 0.119, 0.1013, 0.122,
0.1481, 0.1325, 0.0941, 0.1163, 0.1084, 0.125, 0.1235, 0.119,
0.125, 0.1176, 0.1163, 0.0795, 0.122, 0.1034), Temperature = c(19.8,
19.5, 19.3, 19.3, 19.2, 19.1, 18.8, 18.7, 18.5, 18.3, 18.4,
18.1, 17.9, 17.8, 17.8, 17.9, 17.7, 17.6, 17.6, 17.4, 17.1,
17.1, 16.9, 16.9, 16.9, 16.9, 16.9, 16.7, 16.6, 16.6, 16.5,
16.2, 16.1, 16, 16, 15.8, 15.6, 15.3, 15.2, 15.3, 15.3, 15.3,
15, 14.8, 14.9, 14.9, 14.8, 14.8, 15, 15.6, 16, 16.5, 17.2,
17.9, 18.6, 19.3, 19.8, 20.1, 20.5, 21.1, 21.1, 21.4, 21.7,
22.2, 22.8, 23.3, 23.6, 23.8, 24.3, 24.9, 24.9, 25.5, 25.8,
26.2, 26.6, 27, 27.1, 27.5, 28.2, 28.4, 28.8, 28.9, 29.1,
29.5, 29.9, 29.9, 30, 30.2, 30.2, 30.5, 30.6, 30.6, 30.7,
30.8, 30.7, 30.7, 30.8, 31, 30.9, 30.9)), .Names = c("TimeStamp",
"MeanWindSpeed", "Direction", "TurbInt", "Temperature"), row.names = 2:101, class = "data.frame")
我选择风速和湍流强度数据并对风速进行分级:
dist_turb= temp%>%
dplyr::select(matches("MeanWindSpeed|TurbInt")) %>%
dplyr::mutate(tibin = cut(MeanWindSpeed, breaks = seq(-0.5, 25.5, 1), labels = seq(0, 25, 1))) %>% na.omit() %>%
dplyr::group_by(tibin)
要查看每个风速箱中湍流强度的分布,我可以做到以下几点:
p <- ggplot(dist_turb, aes(x = TurbInt)) +
stat_density(aes(group = tibin, color = tibin),position="identity",geom="line",adjust = 6)
p <- ggplotly(p)
p
我怎样才能获得这些密度分布的数据?我尝试在hist
中使用dplyr
,但它会返回错误:
dist_turb= temp%>%
dplyr::select(matches("MeanWindSpeed|TurbInt")) %>%
dplyr::mutate(tibin = cut(MeanWindSpeed, breaks = seq(-0.5, 25.5, 1), labels = seq(0, 25, 1))) %>% na.omit() %>%
dplyr::group_by(tibin) %>%
dplyr::mutate(den = hist(TurbInt, breaks = 6,plot=FALSE)$density) %>% na.omit()
答案 0 :(得分:0)
问题是density
输出中的元素数量(即breaks = 6
中指定的数量)与每组的行数不同。
temp%>%
dplyr::select(matches("MeanWindSpeed|TurbInt")) %>%
dplyr::mutate(tibin = cut(MeanWindSpeed, breaks = seq(-0.5, 25.5, 1),
labels = seq(0, 25, 1))) %>%
na.omit() %>%
dplyr::group_by(tibin) %>%
dplry::summarise(n = n())
# A tibble: 5 x 2
# tibin n
# <fctr> <int>
#1 8 15
#2 9 27
#3 10 27
#4 11 27
#5 12 4
一种方法是将summarise
或mutate
作为list
res1 <- temp%>%
dplyr::select(matches("MeanWindSpeed|TurbInt")) %>%
dplyr::mutate(tibin = cut(MeanWindSpeed, breaks = seq(-0.5, 25.5, 1),
labels = seq(0, 25, 1))) %>%
na.omit() %>%
dplyr::group_by(tibin) %>%
dplyr::summarise(den = list(hist(TurbInt, breaks = 6,plot=FALSE)$density))
如果我们想要以“长”格式提取列,请使用unnest
res1 %>%
unnest