热图,其值在R中

时间:2016-06-14 19:03:11

标签: r ggplot2 heatmap

我想在每周一天的时间段内在R中绘制车辆编号的热图。我的数据有点不同(至少对我而言)。计数车辆不是在特定时间,而是在一段时间内。请参阅以下内容:

Day begintime   endTime count  
Wednesday   730 1030    10  
Wednesday   830 1115    18  
Wednesday   900 1150    4  
Wednesday   935 1120    33  
Wednesday   1035    1230    7    
Wednesday   1040    1340    45  
Wednesday   1145    1240    43  
Wednesday   1145    1435    17  
Wednesday   1250    1345    55  
Wednesday   1300    1500    53  
Wednesday   1330    1450    27  
Wednesday   1355    1535    65  
Wednesday   1500    1555    63  
Wednesday   1500    1730    1  
Wednesday   1600    1700    30  
Wednesday   1630    1725    4  
Wednesday   1630    1915    42  
Wednesday   1730    1830    11  
Wednesday   1730    2115    3  
Wednesday   1800    1950    6  
Wednesday   1830    1925    13  
Tuesday 700 1100    1  
Tuesday 830 1030    30  
Tuesday 900 1100    4  
Tuesday 935 1100    42  
Tuesday 1030    1230    11  
Tuesday 1040    1320    196  
Tuesday 1130    1330    52  
Tuesday 1145    1430    21  
Tuesday 1245    1535    206  
Tuesday 1300    1430    62  
Tuesday 1325    1610    31  
Tuesday 1355    1525    216  
Tuesday 1430    1600    72  
Tuesday 1500    1700    1  
Tuesday 1530    1720    39  
Tuesday 1605    1755    8  
Tuesday 1630    1850    193  
Tuesday 1730    1825    5  
Tuesday 1730    2110    10  
Tuesday 1800    1920    21  
Tuesday 1800    2150    104  
Thursday    800 925 101  
Thursday    830 1130    2  
Thursday    900 1200    13  
Thursday    935 1125    21  
Thursday    1035    1235    7  
Thursday    1100    1250    36  
Thursday    1145    1310    31  
Thursday    1145    1445    17  
Thursday    1250    1415    46  
Thursday    1300    1600    41  
Thursday    1330    1455    27  
Thursday    1355    1545    56  
Thursday    1500    1600    51  
Thursday    1500    1745    10  
Thursday    1600    1720    18  
Thursday    1630    1750    4  
Thursday    1700    1820    33  
Thursday    1730    1855    7  
Thursday    1730    2120    190  
Thursday    1800    2020    2  
Thursday    1830    1930    6  
Monday  700 755 1  
Monday  830 1020    39  
Monday  900 1030    8  
Monday  935 1055    193  
Monday  935 1235    5  
Monday  1040    1240    9  
Monday  1130    1300    203  
Monday  1145    1405    15  
Monday  1230    1430    19  
Monday  1255    1545    213  
Monday  1325    1515    25  
Monday  1355    1520    29  
Monday  1415    1540    223  
Monday  1500    1650    35  
Monday  1530    1700    190  
Monday  1605    1730    2  
Monday  1630    1830    6  
Monday  1700    2000    9  
Monday  1730    2030    101  
Monday  1800    1850    2  
Monday  1800    2140    13  
Friday  800 950 10  
Friday  830 1210    21  
Friday  930 1100    104  
Friday  935 1135    5  
Friday  1040    1135    16  
Friday  1100    1400    24  
Friday  1145    1325    15  
Friday  1145    1700    26  
Friday  1250    1420    34  
Friday  1310    1435    25  
Friday  1330    1520    36  
Friday  1355    1555    44  
Friday  1500    1620    35  
Friday  1500    1800    101  
Friday  1600    1725    2  
Friday  1630    1755    13  
Friday  1700    1850    21  
Friday  1730    1920    1  
Friday  1730    2130    39  
Friday  1800    2045    8  
Friday  1830    2115    193 

实际上,我只能在begintimeendTime上绘制带有可变计数的热图。这是我的代码:

y <- read.csv("traffic.csv", row.names=1)


--to change the time format
y$begintime <- strptime(y$begintime, format="%H:%M")

p <- ggplot(y, aes(y=begintime,x=Day))
p + geom_tile(aes(fill=count)) + scale_fill_gradient(low="white",    high="red") + xlab("") + ylab("")+  scale_x_discrete(limits=c('Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'))

我可以在热图中表达时间段(同时使用begintimeendTime)而不是仅显示一个时间段吗?

3 个答案:

答案 0 :(得分:1)

这个可能是可视化的一种方式。我把每天分成不连续的半小时时间段并计算(希望正确)表中每一行对该时间段的贡献,假设每个时间间隔的计数分布均匀。

enter image description here

library(ggplot2)
library(reshape2)

overlap <- function(s1, e1, s2, e2) {
  max(0, min(e1,e2)-max(s1,s2))
}
m2txt <- function(m) {
  sprintf("%d:%02d", trunc(m/60), m%%60)
}

y <- read.csv("traffic.csv")

y$begin <- 60*trunc(y$begintime/100) + (y$begintime %% 100)
y$end <- 60*trunc(y$endTime/100) + (y$endTime %% 100)
y$minutes <- y$end-y$begin
y$perMinute <- y$count/y$minutes

binwidth <- 30
days <- c('Monday','Tuesday','Wednesday','Thursday','Friday')
y$Day <- factor(y$Day, levels=days)
times <- seq(0,24*60-1,binwidth)

map <- data.frame(
  day=c(sapply(days, function(i) rep(i,24*60/binwidth))),
  time=rep(times, length(days))
)
map$count = mapply(map$day, map$time, FUN=function(day,time) {
  contrib <- mapply(y$begin, y$end, FUN=function(b, e) overlap(time, time+binwidth, b, e))
  contrib <- contrib * (as.character(y$Day)==day) * y$perMinute
  sum(contrib)
})
times <- sort(unique(subset(map, count>0)$time))

p <- ggplot(map, aes(day, time))
p + geom_tile(aes(fill=count)) + scale_fill_gradient(low="white", high="red") + xlab("") + ylab("") +
  scale_x_discrete(limits=days) + scale_y_continuous(breaks=seq(0,24*60,60), labels=seq(0,24,1)) + 
  coord_cartesian(ylim=c(min(times),max(times)))

答案 1 :(得分:1)

另一种方法是覆盖范围,并根据范围计数/天最大值使用填充上的alpha(最大值是给定日期计数总和的最大值)。您可以为所述alpha策略选择不同的算法:

library(dplyr)
library(ggplot2)

max_count <- max(count(df, Day, wt=count)$n)

df <- mutate(df, fill=count/max_count)
df <- mutate(df, Day=factor(Day, levels=rev(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))))

gg <- ggplot()
gg <- gg + geom_segment(data=df, color="steelblue", size=12,
                        aes(y=Day, yend=Day, x=begintime, xend=endTime, alpha=fill))
gg <- gg + scale_x_continuous(expand=c(0,0), 
                              breaks=seq(700,2200,100),
                              labels=sprintf("%02d", seq(7, 22, 1)),
                              limits=c(700, 2200))
gg <- gg + scale_y_discrete()
gg <- gg + labs(x=NULL, y=NULL,
                title="Counting (steelblue) Cars",
                subtitle="Counting periods were non-uniform. This heatmap uses scales the time range color\nby the fraction of the max car count (total by day).")
gg <- gg + theme_minimal()
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.minor.x=element_blank())
gg <- gg + theme(legend.position="none")
gg <- gg + theme(axis.text.x=element_text(hjust=c(0, rep(0.5, 14), 1), size=8))
gg <- gg + theme(plot.title=element_text(face="bold"))
gg

enter image description here

有一种方法可以在此处获得“正确”标记的缩放颜色条(正确地在引号中,因为您无论如何都要对时间内切片计数的分布做出广泛的假设,因此准确性通常是有点的在窗外),但这对读者来说是一种练习。

答案 2 :(得分:0)

另一个选择是以更离散的方式显示实际间隔:

library(dplyr)
library(ggplot2)

max_count <- max(count(df, Day, wt=count)$n)

df <- mutate(df, fill=count/max_count)
df <- mutate(df, Day=factor(Day, levels=rev(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday"))))
df <- mutate(df, id=1:nrow(df))
df <- arrange(df, begintime)
df <- mutate(df, id=factor(id, levels=rev(df$id)))

gg <- ggplot()
gg <- gg + geom_segment(data=df, size=0.75,
                        aes(y=id, yend=id, x=begintime, xend=endTime,
                            color=count))
gg <- gg + scale_x_continuous(expand=c(0,0), 
                              breaks=seq(700, 2200, 100),
                              labels=sprintf("%02d", seq(7, 22, 1)),
                              limits=c(700, 2200))
gg <- gg + scale_y_discrete()
gg <- gg + scale_color_distiller(name="# cars", palette="RdYlBu")
gg <- gg + facet_wrap(~Day, ncol=1, scales="free_y", switch="y")
gg <- gg + labs(x=NULL, y=NULL,
                title="Counting Cars",
                subtitle="Counting periods were non-uniform. This heatmap scales the time range color by the interval count")
gg <- gg + theme_minimal()
gg <- gg + theme(panel.grid.major.y=element_blank())
gg <- gg + theme(panel.grid.minor.x=element_blank())
gg <- gg + theme(legend.position="bottom")
gg <- gg + theme(axis.text.x=element_text(hjust=c(0, rep(0.5, 14), 1), size=8))
gg <- gg + theme(axis.text.y=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold"))
gg <- gg + theme(strip.text.y=element_text(angle=180))
gg <- gg + theme(panel.margin=margin(t=0, b=0))
gg

enter image description here

这样可以更容易地拥有一个诚实的传奇,并显示计数间隔的多样性。

我不习惯使用间隔平均值,因为我怀疑实际的间隔分布比您考虑的更重要,因此请随意修改以满足您的需求。我也不做生活的交通规划,所以你所做的可能是百分之百酷: - )