我正在编写一些代码来绘制CUSUM图。我不能使用R必须绘制这些图形的(少数)包,因为随着时间的推移,我的边界会定期更新(即在1个图形中边界发生变化)。我目前的问题是如何提取向量首先穿过边界的时间点。边界是数据框中的变量。
另外,当矢量向零(最佳)线移动时,我对交叉边界时不太感兴趣。所以我猜有两个步骤,找到矢量穿过每个边界的所有点,然后选择我最感兴趣的点。
下面是一个边框发生变化的组成示例:
set.seed(1235)
df <- data.frame(Run=c(1:21), y = cumsum(c(0, rnorm(20, 0, 5))))
df$zero <- 0
df$LL1 <- -3
df$LL2 <- -8
df$UL1 <- 6
df$UL2 <- 14
df[c("zero","LL1", "LL2", "UL1", "UL2")][c(11:21),] <- df[c("zero","LL1", "LL2", "UL1", "UL2")][c(11:21),]+14
数据如下所示:
> df
Run y zero LL1 LL2 UL1 UL2
1 1 0.000000 0 -3 -8 6 14
2 2 -3.489940 0 -3 -8 6 14
3 3 -9.914209 0 -3 -8 6 14
4 4 -4.964414 0 -3 -8 6 14
5 5 -4.405535 0 -3 -8 6 14
6 6 -3.834496 0 -3 -8 6 14
7 7 4.656486 0 -3 -8 6 14
8 8 4.895714 0 -3 -8 6 14
9 9 8.170026 0 -3 -8 6 14
10 10 14.996445 0 -3 -8 6 14
11 11 17.009310 14 11 6 20 28
12 12 12.307479 14 11 6 20 28
13 13 17.732298 14 11 6 20 28
14 14 13.981514 14 11 6 20 28
15 15 11.873050 14 11 6 20 28
16 16 7.757170 14 11 6 20 28
17 17 9.820672 14 11 6 20 28
18 18 12.887160 14 11 6 20 28
19 19 10.617464 14 11 6 20 28
20 20 7.286322 14 11 6 20 28
21 21 8.869055 14 11 6 20 28
可以通过下面的代码绘制。上图是实际数据。第二个图表突出显示了我想要提取的点。
par(mfrow=c(2,1))
par(mar=c(3,3,2,3))
plot(df$Run, df$y, type="b", ylim=c(-10, 30), pch=19, cex=2, lwd=2)
abline(v=c(1:21), lty=3, col="grey")
points(df$Run, df$zero, lwd=2, col="darkgreen", type="l")
points(df$Run, df$LL1, type="l", col="orange")
points(df$Run, df$LL2, type="l", col="red")
points(df$Run, df$UL1, type="l", col="orange")
points(df$Run, df$UL2, type="l", col="red")
par(xpd=T)
text(rep(22, 5), c(14,11,6,20,28), c("zero", "LL1", "LL2", "UL1", "UL2"), pos=4, col=c("darkgreen", "orange", "red", "orange", "red"))
par(xpd=F)
plot(df$Run, df$y, type="b", ylim=c(-10, 30), pch=19, cex=2, lwd=2)
abline(v=c(1:21), lty=3, col="grey")
points(df$Run, df$zero, lwd=2, col="darkgreen", type="l")
points(df$Run, df$LL1, type="l", col="orange")
points(df$Run, df$LL2, type="l", col="red")
points(df$Run, df$UL1, type="l", col="orange")
points(df$Run, df$UL2, type="l", col="red")
flags <- data.frame(boundary.crossed=c("LL1", "LL2", "UL1", "UL2", "LL1", "LL1"),col=c("orange", "red", "orange", "red", "orange", "orange"), Run=c(2,3,9,10, 16, 19))
points(df$Run[flags$Run], df$y[flags$Run], cex=2, col=as.character(flags$col), pch=19)
par(xpd=T)
text(rep(22, 5), c(14,11,6,20,28), c("zero", "LL1", "LL2", "UL1", "UL2"), pos=4, col=c("darkgreen", "orange", "red", "orange", "red"))
par(xpd=F)
欢迎任何建议!
此致 LUC
答案 0 :(得分:1)
以下是基础R解决方案:
BoundaryCross <- function(myDf, keepSeparate=TRUE) {
y <- myDf$y
L2 <- myDf$LL2
U2 <- myDf$UL2
L1 <- myDf$LL1
U1 <- myDf$UL1
LimitTwo <- c(which(diff(y > L2)==-1L), which(diff(y > U2)==1L))+1L
LimitOne <- c(which(diff(y > L1)==-1L), which(diff(y > U1)==1L))+1L
## do you won't to differentiate between the boundary crosses???
if (keepSeparate) {
list(YellowIndices=sort(LimitOne), RedIndices=sort(LimitTwo))
} else {
sort(c(LimitOne, LimitTwo))
}
}
打电话给我们,我们获得:
BoundaryCross(df)
$YellowIndices
[1] 2 9 16 19
$RedIndices
[1] 3 10
如果您只是想集体识别指数,我们有:
BoundaryCross(df, FALSE)
[1] 2 3 9 10 16 19