子集化后ggplot2 y轴顺序发生变化

时间:2014-05-05 17:44:30

标签: r ggplot2 r-factor

我有一个按预期工作的函数,直到我将其子集化。 plotCalendar()函数是我使用ggplot2和facet在Calendar热图上的尝试。 y轴顺序很重要,因为它适用于" WeekOfMonth" - 当订单被撤销时,数据viz看起来不像日历。

代码如下,首先是调用代码,然后是生成一些数据的函数 - generateData(),然后是plot函数 - plotCalendar()

当我使用df作为数据时,代码按预期工作,但是当我使用df2(子集数据)时,WeekOfMonth的顺序沿y轴反转。

library(ggplot2)
library(ProgGUIinR)
library(chron)

df <- generateData()
plotCalendar(df, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")
df2 <- df[df$filterField == 42, ]
plotCalendar(df2, dateFieldName = "dates", numericFieldName = "counts", yLab = "Month of Year")

这两个函数,一个用于生成测试数据,另一个用于绘制日历

generateData <- function()
{
      set.seed(42)
      dates <- seq(as.Date("2012/01/01"), as.Date("2012/6/30"), by = "1 day")
      counts <- 1:length(dates)
      filterField <- sample(1:42,length(dates),replace=T)
      df <- data.frame(dates, counts, filterField)

      return(df)
}


plotCalendar <- function(data, dateFieldName, numericFieldName, title = "Title", yLab = "Y Label", fillLab = "Fill Label", lowColor = "moccasin", highColor = "dodgerblue")
{

      agg <- aggregate(as.formula(paste(numericFieldName, "~", dateFieldName)), data, sum)

      names(agg)[names(agg) == dateFieldName] <- "DateField"
      names(agg)[names(agg) == numericFieldName] <- "NumericField"

      minMonth <- as.POSIXlt(min(agg$DateField))$mon + 1
      maxMonth <- as.POSIXlt(max(agg$DateField))$mon + 1

      minYear <- as.POSIXlt(min(agg$DateField))$year + 1900
      maxYear <- as.POSIXlt(max(agg$DateField))$year + 1900 

      minDate <- ISOdate(minYear, minMonth, 1)
      maxDate <- ISOdate(maxYear, maxMonth, 1)
      maxDateEndMonth <- as.POSIXlt(as.Date(seq(maxDate, length = 2, by = "1 month")[2]))
      daySeq <- seq(minDate, maxDateEndMonth, by = "1 day")

      daySeq <- as.data.frame(daySeq)
      names(daySeq) <- c("DateField")
      daySeq$DateField <- as.Date(daySeq$DateField)
      agg$DateField <- as.Date(agg$DateField)

      agg <- merge(daySeq, agg, by = "DateField", all.x = T)

      agg$Day <- as.numeric(days(agg$DateField))

      agg$Weekday <- weekdays(agg$DateField)
      agg$Weekday <- factor(agg$Weekday, levels = rev(c("Saturday", "Friday", "Thursday", "Wednesday", "Tuesday", "Monday", "Sunday")))

      agg$Month <- months(agg$DateField)
      agg$Month <- factor(agg$Month, levels = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"))

      agg$MonthNumber <- as.POSIXlt(agg$DateField)$mon + 1

      agg$Year <-  as.POSIXlt(agg$DateField)$year + 1900

      agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
      agg$WeekOfMonth <- factor(agg$WeekOfMonth, levels = 6:1)

      #makeSpreadsheet(gActs, "Group Activities - Member Participation")

      View(agg)
      p <- ggplot(agg)
      p <- p + aes(Year, WeekOfMonth, fill = NumericField)

      noData <- subset(agg, is.na(agg$NumericField))

      p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
      if(nrow(noData) > 0)
      {
        p <- p + geom_tile(data = noData, color = "gray", fill = "white")
      }

      p <- p + geom_text(aes(label = paste(paste(rep(" ", 5), collapse = ""), Day)), vjust = 0, size = 3, colour = "black")
      p <- p + geom_text(data = subset(agg, !is.na(NumericField)), aes(label = NumericField), size = 4, vjust = 0.5, hjust = 1, color = 'black', fontface = "bold")
      p <- p + facet_grid(Month ~ Weekday) + scale_fill_gradient(low = lowColor, high = highColor)
      p <- p + labs(title = paste(title, "\n"), y = paste(yLab, "\n"), fill = fillLab)
      p <- p + theme(plot.title = element_text(size = 20, face="bold"),  
                     axis.title.x = element_blank(), 
                     axis.ticks.x = element_blank(),
                     axis.text.x = element_blank(),
                     axis.title.y = element_text(size = 16, face = "bold"), 
                     legend.title = element_text(size = 14, face = "bold"), 
                     legend.text = element_text(size = 11),
                     panel.grid.major = element_blank(),
                     panel.grid.minor = element_blank(),
                     strip.text = element_text(size = 14, face = "bold"))
      plot(p)
}

谢谢,

1 个答案:

答案 0 :(得分:6)

如果您颠倒to tile图层的顺序,它就会起作用。

电流:

p <- ggplot(agg, aes(Year, WeekOfMonth, fill = NumericField))
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")

新:

p <- ggplot(agg,aes(Year, WeekOfMonth, fill = NumericField))  
noData <- subset(agg, is.na(agg$NumericField)) 
if(nrow(noData) > 0) p <- p + geom_tile(data = noData, color = "gray", fill = "white")
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")

我认为这个问题与ggplot对因素的处理有关,例如,agg$WeekOfMonth,这些因素缺失了。解决这个问题的一种方法是避免使agg$WeekOfMonth成为一个因素。

agg$WeekOfMonth <- 1 + week.of.month(agg$Year, agg$MonthNumber, agg$Day)
p <- ggplot(agg)
p <- p + aes(Year, -WeekOfMonth, fill = NumericField)  
noData <- subset(agg, is.na(agg$NumericField))
p <- p + geom_tile(data = subset(agg, !is.na(agg$NumericField)), aes(fill = NumericField), color = "gray")
if(nrow(noData) > 0)p <- p + geom_tile(data = noData, color = "gray", fill = "white")

要避免负y轴标签,您必须添加:

p <- p + scale_y_continuous(label=abs)

ggplot图层定义。这产生与上面相同的图,并且不需要反转图块层的顺序。

编辑找到更好的方法

通过使用na.value-... scale_fill_continuous(...)参数,您可以完全避免使用多个数据集。

p <- ggplot(agg)
p <- p + aes(Year, WeekOfMonth, fill = NumericField)
p <- p + geom_tile(aes(fill = NumericField), color = "gray")
p <- p + scale_fill_gradient(low = lowColor, high = highColor, na.value="white")

这完全避免了对noData的需求。

最后,我想你有理由以这种方式显示日历,但IMO在这里是一个更直观的日历视图。

gg.calendar <- function(df) {
  require(ggplot2)
  require(lubridate)
  wom <- function(date) { # week-of-month
    first <- wday(as.Date(paste(year(date),month(date),1,sep="-")))
    return((mday(date)+(first-2)) %/% 7+1)
  }
  df$month <- month(df$dates)
  df$day   <- mday(df$dates)

  rng   <- range(df$dates)
  rng   <- as.Date(paste(year(rng),month(rng),1,sep="-"))
  start <- rng[1]
  end   <- rng[2]
  month(end) <- month(end)+1
  day(end)   <- day(end)  -1

  cal <- data.frame(dates=seq(start,end,by="day"))
  cal$year  <- year(cal$date)
  cal$month <- month(cal$date)
  cal$cmonth<- month(cal$date,label=T)
  cal$day   <- mday(cal$date)
  cal$cdow  <- wday(cal$date,label=T)
  cal$dow   <- wday(cal$date)
  cal$week  <- wom(cal$date)

  cal        <- merge(cal,df[,c("dates","counts")],all.x=T)

  ggplot(cal, aes(x=cdow,y=-week))+
    geom_tile(aes(fill=counts,colour="grey50"))+
    geom_text(aes(label=day),size=3,colour="grey20")+
    facet_wrap(~cmonth, ncol=3)+
    scale_fill_gradient(low = "moccasin", high = "dodgerblue", na.value="white")+
    scale_color_manual(guide=F,values="grey50")+
    scale_x_discrete(labels=c("S","M","T","W","Th","F","S"))+
    theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())+
    theme(panel.grid=element_blank())+
    labs(x="",y="")+
    coord_fixed()
}
gg.calendar(df)
gg.calendar(df2)