ggplot group by fill and show mean

时间:2017-11-29 05:05:52

标签: r ggplot2

我正在制作热图并按照本教程进行操作:

https://www.r-graph-gallery.com/283-the-hourly-heatmap/

要保存点击,请在此处重现以下代码块:

library(ggplot2)
library(dplyr) # easier data wrangling 
library(viridis) # colour blind friendly palette, works in B&W also
library(Interpol.T) #  will generate a large dataset on initial load
library(lubridate) # for easy date manipulation
library(ggExtra) # because remembering ggplot theme options is beyond me
library(tidyr) 


data<- data(Trentino_hourly_T,package = "Interpol.T")

names(h_d_t)[1:5]<- c("stationid","date","hour","temp","flag")
df<- tbl_df(h_d_t) %>%
  filter(stationid =="T0001")

df<- df %>% mutate(year = year(date),
                  month = month(date, label=TRUE),
                  day = day(date))

df$date<-ymd(df$date) # not necessary for plot but 
#useful if you want to do further work with the data

#cleanup
rm(list=c("h_d_t","mo_bias","Tn","Tx",
          "Th_int_list","calibration_l",
          "calibration_shape","Tm_list"))


#create plotting df
df <-df %>% select(stationid,day,hour,month,year,temp)

然后制作热图:

p <-ggplot(df,aes(day,hour,fill=temp))+
  geom_tile(color= "white",size=0.1) + 
  scale_fill_viridis(name="Hrly Temps C",option ="C")
p <-p + facet_grid(year~month)
p <-p + scale_y_continuous(trans = "reverse", breaks = unique(df$hour))

到目前为止,我可以重新创造这个。然而,我自己的数据集是访问级别的网站访问数据,因此在给定的日期和时间内进行了多次访问。除了访问,我还有一个timeOnPage指标。

以下数据样本,带有dput。

我想热映射平均每小时访问次数或timeOnPage。这是我尝试过的。

我的数据样本:

> dput(sam)
structure(list(Day = structure(c(4L, 4L, 4L, 5L, 3L, 2L, 3L, 
6L, 2L, 2L, 4L, 2L, 3L, 3L, 6L, 1L, 4L, 2L, 3L, 5L, 2L, 5L, 4L, 
2L, 5L, 2L, 7L, 5L, 6L, 2L, 2L, 6L, 4L, 6L, 2L, 2L, 2L, 5L, 5L, 
2L, 6L, 5L, 3L, 5L, 3L, 2L, 6L, 4L, 2L, 5L, 2L, 5L, 4L, 2L, 6L, 
2L, 7L, 2L, 2L, 2L, 5L, 6L, 3L, 2L, 3L, 4L, 4L, 3L, 6L, 2L, 5L, 
3L, 4L, 4L, 3L, 2L, 5L, 5L, 5L, 3L, 5L, 2L, 4L, 5L, 5L, 2L, 3L, 
6L, 2L, 2L, 5L, 4L, 6L, 7L, 3L, 3L, 4L, 4L, 2L, 6L), .Label = c("Sun", 
"Mon", "Tues", "Wed", "Thurs", "Fri", "Sat"), class = c("ordered", 
"factor")), Hour = c(18L, 7L, 3L, 22L, 11L, 11L, 9L, 16L, 16L, 
13L, 18L, 18L, 10L, 19L, 7L, 13L, 18L, 14L, 10L, 20L, 17L, 6L, 
21L, 15L, 18L, 7L, 12L, 10L, 16L, 14L, 18L, 13L, 17L, 10L, 19L, 
20L, 14L, 16L, 10L, 9L, 16L, 9L, 8L, 13L, 17L, 17L, 11L, 15L, 
22L, 17L, 18L, 17L, 7L, 19L, 12L, 2L, 12L, 15L, 7L, 17L, 17L, 
18L, 13L, 10L, 19L, 9L, 13L, 13L, 17L, 21L, 23L, 4L, 17L, 12L, 
12L, 9L, 17L, 19L, 7L, 4L, 5L, 17L, 6L, 23L, 3L, 14L, 19L, 13L, 
7L, 11L, 9L, 13L, 9L, 19L, 11L, 5L, 20L, 20L, 19L, 11L), sessionID = c("1508980591045.l027p6mt", 
"1510155616668.57i2wj1", "1510140439620.qu19kyo", "1510296404412.xasqfwqd10v1qdtl6jemi", 
"1510082622485.szj2ja1e", "1511204933263.mq9bvi0d", "1511285142249.vp2fyfd9", 
"1510965282725.x04h1dko", "1508801295434.e056cpef", "1508790369346.ly63bjgr", 
"1509585154520.3usd036k", "1511834881064.e6f5evp", "1509471114265.2u807dwo", 
"1507688054076.9dls0jk", "1509721031589.ho125mpb", "1510521845178.99j1ibkr", 
"1510194555297.ioepfjgr", "1508793469455.hkc3xwa8", "1511288175700.62n5oc5", 
"1510287319653.7ye9sjc", "1511227016523.yyn1of99", "1511448209341.1u5vir5p", 
"1510205972493.qvu4ev7o", "1510615247987.swxhwct", "1508463701266.p52sdjzp", 
"1510588449881.d6ffruv9", "1507404213416.rovwmmge", "1510857718956.2z57w2vr", 
"1510360661780.19hznp3m78pvi", "1511820500742.48cyvo2a", "1508809029952.up0wqq5h", 
"1508533120441.gdvhacjr7jswiquwuyp66r", "1509583258224.j8krac0sz5kx8pxohl4n29", 
"1511549442901.5vm7na1l", "1508811367845.7b36epqk", "1509421407861.om0ydylt", 
"1508794534361.p3gcoa0e", "1510877729807.viad220f", "1511460355269.omwvd00l", 
"1508775703610.usuk2akm", "1510964376869.7e2crw9d", "1510247098808.np9ia23", 
"1508860753512.3z4182b", "1510868797935.3nmpvkri", "1510105270807.4evhpys", 
"1511831565084.27izf13f", "1510340973580.l9qj5drou5wmi", "1508364715184.14l4ikj", 
"1509426566404.9qnp0m3", "1510275972333.hhqu0exc", "1510625679744.jk3vvt1v", 
"1510881839700.c34skful", "1511365134270.57thqyir", "1509416741055.1f2cnmrp", 
"1509738404263.8ajwpij", "1510570338116.h9a5j88", "1511640706961.qw8q1eh", 
"1510011913201.eqd54kw", "1508769010911.wrpb329", "1508803518777.56b2ej2l", 
"1509670743316.yhncp17j", "1511576965410.y47g0wgj", "1508876390209.wem8i3lh", 
"1508779846415.hyx8qar", "1511322782502.s835px9", "1509554323957.osxgi0em", 
"1510176829762.jncm9xwb", "1509482328620.sqdbob0u", "1508545652936.a5hqcmp1fw29", 
"1508817816447.6mbdldxb", "1510297785623.33i6yhko", "1508843299131.3m26sqf5", 
"1510191633431.cl5fh9ik", "1509565114633.bd5yrkf5", "1510690660714.818yxn5o", 
"1507567660773.ybpbfgn", "1509667501973.1a9f9pyp", "1509674601865.yqvmcclv", 
"1511450423709.s149r25q", "1511267096892.n5u1d0nv", "1509624499459.u57lgtt8", 
"1510019204298.ka4w9kfh", "1511362131909.t26h6ig", "1510904968660.eowoea2q", 
"1510225256391.4dk073ej", "1510006654569.reo2eili", "1509501692686.ng48bwnz", 
"1509741958143.bxbf325r", "1508770633217.33ymrfgc", "1511810438817.zcgpr6vj", 
"1510852180447.wywsj7f", "1510176833767.nev0iaec", "1509727547082.53van2sr", 
"1507430914148.niu297m", "1508868705810.akd7r18h", "1510060231388.mz9ojf6g", 
"1509592760232.qtrlxye8", "1509592651211.1r82ucw4", "1508812928318.f3st4004", 
"1509734102140.leol1dnw"), uniquePageviews = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), timeOnPage = c(359, 149, 69, 146, 147, 119, 168, 69, 29, 0, 
1542, 148, 242, 49, 457, 175, 175, 97, 79, 12, 0, 1141, 150, 
236, 74, 128, 23, 147, 172, 223, 225, 88, 69, 156, 0, 49, 110, 
150, 70, 123, 30, 145, 1629, 1, 119, 169, 48, 136, 529, 130, 
149, 124, 281, 2483, 0, 60, 149, 50, 29, 124, 149, 0, 92, 149, 
915, 47, 50, 89, 143, 84, 129, 147, 138, 80, 33, 226, 70, 146, 
177, 98, 150, 32, 148, 149, 12, 338, 146, 204, 149, 148, 26, 
149, 1110, 148, 23, 151, 0, 100, 0, 28)), row.names = c(20219L, 
42612L, 42149L, 46707L, 40122L, 57449L, 60878L, 56707L, 11725L, 
10102L, 29911L, 71743L, 25952L, 1492L, 35570L, 48411L, 43917L, 
10530L, 61004L, 46446L, 58846L, 65695L, 44287L, 49341L, 2999L, 
48502L, 627L, 54118L, 48148L, 70166L, 13346L, 4770L, 29745L, 
67979L, 13832L, 24814L, 10692L, 54744L, 65995L, 8216L, 56683L, 
44920L, 18121L, 54499L, 41155L, 71353L, 47606L, 1900L, 25023L, 
45811L, 49937L, 54904L, 63607L, 24571L, 36060L, 48479L, 69086L, 
37708L, 7353L, 12117L, 33912L, 68752L, 19081L, 8768L, 62647L, 
28317L, 43172L, 26286L, 6359L, 14907L, 46733L, 16418L, 43797L, 
28637L, 51671L, 1273L, 33677L, 34226L, 65759L, 60247L, 31739L, 
38171L, 63497L, 55589L, 44462L, 37454L, 27141L, 36178L, 7543L, 
69636L, 54030L, 43173L, 35743L, 852L, 18784L, 39283L, 30672L, 
30663L, 14142L, 35933L), class = "data.frame", .Names = c("Day", 
"Hour", "sessionID", "uniquePageviews", "timeOnPage"))

看起来像这样:

> head(sam)
        Day Hour                           sessionID uniquePageviews timeOnPage
20219   Wed   18              1508980591045.l027p6mt               1        359
42612   Wed    7               1510155616668.57i2wj1               1        149
42149   Wed    3               1510140439620.qu19kyo               1         69
46707 Thurs   22 1510296404412.xasqfwqd10v1qdtl6jemi               1        146
40122  Tues   11              1510082622485.szj2ja1e               1        147
57449   Mon   11              1511204933263.mq9bvi0d               1        119
> glimpse(sam)
Observations: 100
Variables: 5
$ Day             <ord> Wed, Wed, Wed, Thurs, Tues, Mon, Tues, Fri, Mon, Mon, Wed, Mon, Tues, Tues, Fri, Sun, Wed, M...
$ Hour            <int> 18, 7, 3, 22, 11, 11, 9, 16, 16, 13, 18, 18, 10, 19, 7, 13, 18, 14, 10, 20, 17, 6, 21, 15, 1...
$ sessionID       <chr> "1508980591045.l027p6mt", "1510155616668.57i2wj1", "1510140439620.qu19kyo", "1510296404412.x...
$ uniquePageviews <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ timeOnPage      <dbl> 359, 149, 69, 146, 147, 119, 168, 69, 29, 0, 1542, 148, 242, 49, 457, 175, 175, 97, 79, 12, ...

度量标准uniquePageviews将始终为1或o,并且在热图中它看起来不太好。由于它的会话级数据每天/每小时有多个条目。对于timeOnPage,我想热映射页面上给定小时和星期几组合的平均时间。

所以,据我所知,ggplot正在总结一切,而我想要的意思是()。

我的初始代码块:

# creates the initial heatmap
p <- ggplot(sam, aes(x = Day, y = Hour, fill = uniquePageviews)) +
  geom_tile(color = "white", size = 0.1) + 
  scale_fill_viridis(name = "TimeOnPage", option ="C")

# order by hour of day going top to bottom asc
p <-p + scale_y_continuous(trans = "reverse", breaks = unique(df$hour))

我尝试将其更改为此但结果看起来完全相同:

# gets the initial heatmap
p <- ggplot(sam, aes(x = Day, y = Hour, fill = uniquePageviews),
            stat = "summary", fun.y = "mean") +
  geom_tile(color = "white", size = 0.1) + 
  scale_fill_viridis(name = "Mean TimeOnPage", option ="C")

# order by hour of day going top to bottom asc
p <-p + scale_y_continuous(trans = "reverse", breaks = unique(df$hour))

我可以通过数据帧sam上的转换来做一些dplyr组,但我不确定ggplot::geom_tile()是否会处理这个问题?

如何使用ggplot创建热图,其中填充基于均值?此外,有人可以澄清它现在正在展示的内容吗?总和?

1 个答案:

答案 0 :(得分:0)

不确定我是否解决了您的问题,但您可以尝试以下操作:

library(tidyverse)
library(viridis)
d %>% 
  group_by(Day, Hour) %>% 
  summarise(Mean=mean(timeOnPage)) %>% 
  ggplot(aes(x = Day, y = Hour, fill = Mean)) +
  geom_tile(color = "white", size = 0.1) + 
  scale_fill_viridis(name = "TimeOnPage", option ="C")

这将计算每天和每小时的平均timeOnPage并将其绘制为热图。