查找向量跨越边界的索引

时间:2016-09-05 02:13:07

标签: r data-manipulation

我正在编写一些代码来绘制CUSUM图。我不能使用R必须绘制这些图形的(少数)包,因为随着时间的推移,我的边界会定期更新(即在1个图形中边界发生变化)。我目前的问题是如何提取向量首先穿过边界的时间点。边界是数据框中的变量。

另外,当矢量向零(最佳)线移动时,我对交叉边界时不太感兴趣。所以我猜有两个步骤,找到矢量穿过每个边界的所有点,然后选择我最感兴趣的点。

下面是一个边框发生变化的组成示例:

set.seed(1235)
df <- data.frame(Run=c(1:21), y = cumsum(c(0, rnorm(20, 0, 5))))
df$zero <- 0
df$LL1  <- -3
df$LL2  <- -8
df$UL1  <-  6
df$UL2  <- 14

df[c("zero","LL1", "LL2", "UL1", "UL2")][c(11:21),] <- df[c("zero","LL1", "LL2", "UL1", "UL2")][c(11:21),]+14

数据如下所示:

> df
   Run         y zero LL1 LL2 UL1 UL2
1    1  0.000000    0  -3  -8   6  14
2    2 -3.489940    0  -3  -8   6  14
3    3 -9.914209    0  -3  -8   6  14
4    4 -4.964414    0  -3  -8   6  14
5    5 -4.405535    0  -3  -8   6  14
6    6 -3.834496    0  -3  -8   6  14
7    7  4.656486    0  -3  -8   6  14
8    8  4.895714    0  -3  -8   6  14
9    9  8.170026    0  -3  -8   6  14
10  10 14.996445    0  -3  -8   6  14
11  11 17.009310   14  11   6  20  28
12  12 12.307479   14  11   6  20  28
13  13 17.732298   14  11   6  20  28
14  14 13.981514   14  11   6  20  28
15  15 11.873050   14  11   6  20  28
16  16  7.757170   14  11   6  20  28
17  17  9.820672   14  11   6  20  28
18  18 12.887160   14  11   6  20  28
19  19 10.617464   14  11   6  20  28
20  20  7.286322   14  11   6  20  28
21  21  8.869055   14  11   6  20  28

可以通过下面的代码绘制。上图是实际数据。第二个图表突出显示了我想要提取的点。

par(mfrow=c(2,1))
par(mar=c(3,3,2,3))

plot(df$Run, df$y, type="b", ylim=c(-10, 30), pch=19, cex=2, lwd=2)
abline(v=c(1:21), lty=3, col="grey")

points(df$Run, df$zero, lwd=2, col="darkgreen", type="l")
points(df$Run, df$LL1, type="l", col="orange")
points(df$Run, df$LL2, type="l", col="red")
points(df$Run, df$UL1, type="l", col="orange")
points(df$Run, df$UL2, type="l", col="red")

par(xpd=T)
text(rep(22, 5), c(14,11,6,20,28), c("zero", "LL1", "LL2", "UL1", "UL2"), pos=4, col=c("darkgreen", "orange", "red", "orange", "red"))
par(xpd=F)


plot(df$Run, df$y, type="b", ylim=c(-10, 30), pch=19, cex=2, lwd=2)
abline(v=c(1:21), lty=3, col="grey")

points(df$Run, df$zero, lwd=2, col="darkgreen", type="l")
points(df$Run, df$LL1, type="l", col="orange")
points(df$Run, df$LL2, type="l", col="red")
points(df$Run, df$UL1, type="l", col="orange")
points(df$Run, df$UL2, type="l", col="red")

flags <- data.frame(boundary.crossed=c("LL1", "LL2", "UL1", "UL2", "LL1", "LL1"),col=c("orange", "red", "orange", "red", "orange", "orange"), Run=c(2,3,9,10, 16, 19))
points(df$Run[flags$Run], df$y[flags$Run], cex=2,  col=as.character(flags$col), pch=19)

par(xpd=T)
text(rep(22, 5), c(14,11,6,20,28), c("zero", "LL1", "LL2", "UL1", "UL2"), pos=4, col=c("darkgreen", "orange", "red", "orange", "red"))
par(xpd=F)

enter image description here

欢迎任何建议!

此致 LUC

1 个答案:

答案 0 :(得分:1)

以下是基础R解决方案:

BoundaryCross <- function(myDf, keepSeparate=TRUE) {
    y <- myDf$y
    L2 <- myDf$LL2
    U2 <- myDf$UL2
    L1 <- myDf$LL1
    U1 <- myDf$UL1
    LimitTwo <- c(which(diff(y > L2)==-1L), which(diff(y > U2)==1L))+1L
    LimitOne <- c(which(diff(y > L1)==-1L), which(diff(y > U1)==1L))+1L

    ## do you won't to differentiate between the boundary crosses???
    if (keepSeparate) {
        list(YellowIndices=sort(LimitOne), RedIndices=sort(LimitTwo))
    } else {
        sort(c(LimitOne, LimitTwo))
    }
}

打电话给我们,我们获得:

BoundaryCross(df)
$YellowIndices
[1]  2  9 16 19

$RedIndices
[1]  3 10

如果您只是想集体识别指数,我们有:

BoundaryCross(df, FALSE)
[1]  2  3  9 10 16 19