我有一个CSV文件,其中包含我感兴趣的每个(Java GC)事件的行。该对象由亚秒时间戳(非等距)和一些变量组成。该对象如下所示:
gcdata <- read.table("http://bernd.eckenfels.net/view/gc1001.ygc.csv",header=TRUE,sep=",", dec=".")
start = as.POSIXct(strptime("2012-01-01 00:00:00", format="%Y-%m-%d %H:%M:%S"))
gcdata.date = gcdata$Timestamp + start
gcdata = gcdata[,2:7] # remove old date col
gcdata=data.frame(date=gcdata.date,gcdata)
str(gcdata)
结果
'data.frame': 2997 obs. of 7 variables:
$ date : POSIXct, format: "2012-01-01 00:00:06" "2012-01-01 00:00:06" "2012-01-01 00:00:18" ...
$ Distance.s. : num 0 0.165 11.289 9.029 11.161 ...
$ YGUsedBefore.K.: int 1610619 20140726 20148325 20213304 20310849 20404772 20561918 21115577 21479211 21544930 ...
$ YGUsedAfter.K. : int 7990 15589 80568 178113 272036 429182 982841 1346475 1412181 1355412 ...
$ Promoted.K. : int 0 0 0 0 8226 937 65429 71166 62548 143638 ...
$ YGCapacity.K. : int 22649280 22649280 22649280 22649280 22649280 22649280 22649280 22649280 22649280 22649280 ...
$ Pause.s. : num 0.0379 0.022 0.0287 0.0509 0.109 ...
在这种情况下,我关心暂停时间(以秒为单位)。我想绘制一个图表,它将显示每个(挂钟)小时基本上平均值为一条线,2%和98%为灰色走廊,最大值(每小时内)为红线。 / p>
我做了一些工作,但是使用q98函数很难看,不得不使用多行语句似乎很浪费,而且我不知道如何在q02和q98之间实现灰色区域:
q02 <- function(x, ...) { x <- quantile(x,probs=c(0.2)) }
q98 <- function(x, ...) { x <- quantile(x,probs=c(0.98)) }
hours = droplevels(cut(gcdata$date, breaks="hours")) # can I have 2 hours?
plot(aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=max),ylim=c(0,2), col="red", ylab="Pause(s)", xlab="Days") # Is always black?
lines(aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=q98),ylim=c(0,2), col="green")
lines(aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=q02),ylim=c(0,2), col="green")
lines(aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=mean),ylim=c(0,2), col="blue")
现在生成一个图表,其中黑点为最大值,蓝线为小时平均值,下限和上限为0.2 + 0,98绿线。我认为有一个灰色的走廊,可能是一个虚线的最大(红色)线,并以某种方式固定轴标签将更好阅读。 有什么建议? (该文件可在上面找到)
答案 0 :(得分:4)
您必须尝试polygon
。这段代码很有用:
y98 = aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=q98)
y02 = aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=q02)
ymax = aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=max)
ymin = aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=min)
ymean = aggregate(gcdata$Pause.s. ~ hours, data=gcdata, FUN=mean)
x = ymean[,1]
y1 = cbind(y02[,2], ymean[,2], y98[,2])
y2 = cbind(ymin[,2], ymean[,2], ymax[,2])
plotAreaCI(x,y2, ylim=c(0,2), xlab="time", ylab="variable")
plotAreaCI(x,y1, ylim=c(0,2), poly.col="blue", add=TRUE)
或
plotAreaCI(x,y2, ylim=c(0,2), xlab="time", ylab="variable", nice.x = TRUE)
plotAreaCI(x,y1, ylim=c(0,2), mean.lwd=2, poly.col="blue", add=TRUE)
其中函数plotAreaCI
由:
plotAreaCI = function(x, y, add=FALSE, nice.x = FALSE,
xlim=NULL, ylim=NULL,
mean.col="black", mean.lwd=1.5,
poly.col="gray", poly.lty=3,
xlab=NULL, ylab=NULL, main="",
...) {
isFactorX = isClass("factor", x)
if(isFactorX) {
x.label = x
x = as.numeric(x)
}
if(is.null(xlim)) xlim=range(x, na.rm=TRUE)
if(is.null(ylim)) ylim=range(y, na.rm=TRUE)
x.pol = c(x, rev(x), x[1])
y.pol = c(y[,1], rev(y[,3]), y[,1][3])
if(!add) {
plot.new()
plot.window(xlim=xlim, ylim=ylim, ...)
if(!nice.x & isFactorX) {
axis(1, at=x, labels=x.label)
} else {
xticks = axTicks(1)
if(isFactorX) {
xticks = xticks[xticks>=1]
axis(1, at=xticks, labels=x.label[xticks])
} else {
axis(1)
}
}
axis(2, las=1)
box()
title(xlab=xlab, ylab=ylab, main=main)
}
polygon(x.pol, y.pol, col=poly.col, lty=poly.lty)
lines(x, y[,2], col=mean.col, lwd=mean.lwd)
return(invisible())
}
答案 1 :(得分:1)
这是我用来绘制实验室分析物的时间变化(在这种情况下为收缩压)的代码:
SBP.qtr.mat <- aggregate(set1HLI$SBP,
list( year(set1HLI$Drawdt)+0.25* quarter(set1HLI$Drawdt)),
quantile, prob=c(0.1,0.25,0.5,0.75, 0.9,0.95, 0.975), na.rm=TRUE)
matplot(SBP.qtr.mat[,1], SBP.qtr.mat$x, type="pl")
不应该太难以适应你的问题....或者你可以发布一个可重复的例子来使用。这给出了单个data.frame和matplot中的10th,25th,50th,75th,90th,95th和97.5th百分位数来处理这样一个对象的绘图。
灰色区域?,...通常的方法是在下边界绘制一个多边形,&#34;转动&#34;在正确的极端并返回高侧,并在左侧连接回来。 polygon
参数设置为x, y
。您可以设置col
参数&#34;灰色&#34;。
制作2小时&#39;您可以合并数据框或与cut.POSIXt" as a breaks argument , there is the option of using multiples of time units with
seq.POSIXt`一起使用的序列:
> seq(ISOdate(1910,1,1), ISOdate(1999,1,1), "10 years")
[1] "1910-01-01 12:00:00 GMT" "1920-01-01 12:00:00 GMT" "1930-01-01 12:00:00 GMT" "1940-01-01 12:00:00 GMT"
[5] "1950-01-01 12:00:00 GMT" "1960-01-01 12:00:00 GMT" "1970-01-01 12:00:00 GMT" "1980-01-01 12:00:00 GMT"
[9] "1990-01-01 12:00:00 GMT"
我没有记录,但您可以使用cut.POSIXt
的倍数间隔:
> str( cut( seq(ISOdate(1910,1,1), ISOdate(1999,1,1), "years"), "10 years") )
Factor w/ 9 levels "1910-01-01","1920-01-01",..: 1 1 1 1 1 1 1 1 1 1 ...
> str( cut( seq(ISOdate(1910,1,1), ISOdate(1999,1,1), "years"), "5 years") )
Factor w/ 18 levels "1910-01-01","1915-01-01",..: 1 1 1 1 1 2 2 2 2 2 ...
答案 2 :(得分:1)
period.apply()
函数和endpoints()
函数来获取两小时的聚合。
所以在顶部我会使用
library(zoo) # for zoo objects
library(xts) # for period.apply
gcdata <- read.table("http://bernd.eckenfels.net/view/gc1001.ygc.csv",
header=TRUE, sep=",", dec=".")
timestamps <- gcdata$Timestamp +
as.POSIXct(strptime("2012-01-01 00:00:00",
format="%Y-%m-%d %H:%M:%S"))
gcdatazoo <- zoo(gcdata[-1], order.by=timestamps) # as zoo object
创建一个zoo
对象。你的功能仍然是:
plotAreaCorridor <- function(x, y, col.poly1="lightgray", col.poly2="gray",...) {
x.pol <- c(x, rev(x), x[1])
y.pol <- c(y[,1], rev(y[,5]),y[,1][1])
plot(x, y[,6]+1, type="n", ...)
polygon(x.pol, y.pol, col=col.poly1, lty=0)
x.pol <- c(x, rev(x), x[1])
y.pol <- c(y[,2], rev(y[,4]), y[,1][1])
polygon(x.pol, y.pol, col=col.poly2, lty=0)
lines(x, y[,3], col="blue") # median
lines(x, y[,6], col="red") # max
invisible(NULL)
}
然后我们可以简化一下:
agg <- period.apply(gcdatazoo[,"Pause.s."], # to which data
INDEX=endpoints(gcdatazoo, "hours", k=2), # every 2 hours
FUN=function(x) quantile(x, # what fun.
probs=c(5,20,50,80,95,100)/100))
#v99 = q99(gcdata$Pause.s.) # what is q99 ?
v99 <- mean(agg[,5]) # mean of 95-th percentile?
plotAreaCorridor(index(agg), # use time index as x axis
coredata(agg), # and matrix part of zoo object as data
ylim=c(0,max(agg[,5])*1.5),
ylab="Quantiles of GC events",
main="NewPar Collection Activity")
abline(h=median(gcdatazoo[,"Pause.s."]), col="lightblue")
abline(h=v99, col="grey")
labeltxt <- paste("99%=",round(v99,digits=3),"s n=", nrow(gcdatazoo),sep="")
text(x=index(agg)[20], y=1.5*v99, labeltxt, col="grey", pos=3) # or legend()
给出了
轴现在是自动的,仅显示工作日,因为跨度小于一周;这可以根据需要进行覆盖。
答案 3 :(得分:0)
我目前没有得到以下脚本(仍然需要查看来自DWin的更高级的答案)。现在看起来有点像我在寻找,但代码仍然很难看(例如我不知道如何对齐标签以及如何获得正确的xlab标签):
plotAreaCorridor = function(x, y, col.poly1="lightgray", col.poly2="gray",...) {
x.pol = c(x, rev(x), x[1])
y.pol = c(y[,1], rev(y[,5]),y[,1][1])
plot(x, y[,6]+1, type="n", ...) # ugly since type="n" does not work for factor
polygon(x.pol, y.pol, col=col.poly1, lty=0)
x.pol = c(x, rev(x), x[1])
y.pol = c(y[,2], rev(y[,4]), y[,1][1])
polygon(x.pol, y.pol, col=col.poly2, lty=0)
lines(x, y[,3], col="blue") # median
lines(x, y[,6], col="red") # max
return(invisible())
}
pause = gcdata$Pause.s.
hours = droplevels(cut(gcdata$date, breaks="hours")) # can I have 2 hours?
agg = aggregate(pause ~ hours, FUN=quantile, probs=c(5,20,50,80,95,100)/100)
x = agg$hours
ys = agg$pause
q99 <- function(x, ...) { x <- quantile(x,probs=c(0.99)) }
v99 = q99(gcdata$Pause.s.)
vmed = median(gcdata$Pause.s.)
plotAreaCorridor(x, ys,ylim=c(0,v99*1.5))
abline(h=vmed, col="lightblue")
abline(h=v99, col="grey")
label=paste("99%=",round(v99,digits=3),"s n=", length(gcdata$date),sep="")
text(x=30, y=v99, label, col="grey", pos=3)
title("NewPar Collection Activity")