Question

我试图弄清楚如何计算从一列表示True到另一列表示True时的行数。我尝试使用行程编码，但无法弄清楚如何从每列中获取交替值。

set.seed(42)
s<-sample(c(0,1,2,3),500,replace=T)
isOverbought<-s==1
isOverSold<-s==0
head(cbind(isOverbought,isOverSold),20)
res<-rle(isOverSold)
tt<-res[res$values==0] #getting when Oversold is true

 > head(cbind(isOverbought,isOverSold))

[1,]        FALSE      FALSE
[2,]        FALSE      FALSE
[3,]         TRUE      FALSE <-starting condition is overbought
[4,]        FALSE      FALSE
[5,]        FALSE      FALSE
[6,]        FALSE      FALSE
[7,]        FALSE      FALSE
[8,]        FALSE       TRUE <-is oversold. length from overbought to oversold = 5
[9,]        FALSE      FALSE
[10,]        FALSE      FALSE
[11,]         TRUE      FALSE <- is overbought. length from oversold to overbought = 3
[12,]        FALSE      FALSE
[13,]        FALSE      FALSE
[14,]         TRUE      FALSE
[15,]         TRUE      FALSE
[16,]        FALSE      FALSE
[17,]        FALSE      FALSE
[18,]        FALSE       TRUE <-is oversold. length from overbought to oversold = 7
[19,]         TRUE      FALSE <- is overbought. length from oversold to overbought = 1 
[20,]        FALSE      FALSE

目标

overboughtTOoversold oversoldTOoverbought
5                     3
7                     1

Answer 1

这个答案的假设是至少有一个超买/超卖过渡（任一方向），因此数据中至少有两行。通过计算超买和超卖条件的数量并确保两者都大于一，可以很容易地检查这种情况。

关键是要消除连续的超买和超卖情况，以便我们只有交替的超买和超卖情况。一种方法是：

## detect where we are overbought and oversold
i1 <- which(isOverbought)
i2 <- which(isOverSold)
## concatenate into one vector
i3 <- c(i1,i2)
## sort these and get the indices from the sort
i4 <- order(i3)
## at this point consecutive overbought or oversold conditions
## will be marked by a difference of 1 in i4 while alternating
## conditions will be marked by something other than 1. So 
## filter those out to get i6. BTW, consecutive here does not mean
## consecutive rows in the data but consecutive occurrence of 
## either overbought or oversold conditions without an intervening 
## condition of the other. The assumption for at least one transition 
## in the data is needed for this to work.
i5 <- diff(i4)
i6 <- i4[c(1,which(i5 != 1)+1)]
## then recover the alternating rows of overbought and oversold conditions in i7
i7 <- i3[i6]
## take the difference and format the output
## I need to credit @akrun for this part
i8 <- diff(i7)
## need to determine which is first
if (i1[1] < i2[1]) {
  overboughtTOoversold <- i8[c(TRUE, FALSE)]
  oversoldTOoverbought <- i8[c(FALSE, TRUE)]
} else {
  overboughtTOoversold <- i8[c(FALSE, TRUE)]
  oversoldTOoverbought <- i8[c(TRUE, FALSE)]  
}
d1 <- cbind(overboughtTOoversold, oversoldTOoverbought)
print(head(d1))
##     overboughtTOoversold oversoldTOoverbought
##[1,]                    5                    3
##[2,]                    7                    1
##[3,]                    3                    5
##[4,]                    8                    6
##[5,]                    2                    2
##[6,]                   10                    4

cbind可能会生成一个警告，指出列的长度不同。要摆脱这种情况，只需在最后用NA填充。

上述更紧凑的版本是：

i3 <- c(which(isOverbought), which(isOverSold))
i4 <- order(i3)
i8 <- diff(i3[i4[c(1,which(diff(i4) != 1)+1)]])
if (which(isOverbought)[1] < which(isOverSold)[1]) {
  overboughtTOoversold <- i8[c(TRUE, FALSE)]
  oversoldTOoverbought <- i8[c(FALSE, TRUE)]
} else {
  overboughtTOoversold <- i8[c(FALSE, TRUE)]
  oversoldTOoverbought <- i8[c(TRUE, FALSE)]  
}
d1 <- cbind(overboughtTOoversold, oversoldTOoverbought)

Answer 2

这足以解决您的问题。

## `a` to `b`
a2b <- function (a, b) {
  x <- which(a)    ## position of `TRUE` in `a`
  y <- which(b)    ## position of `TRUE` in `b`
  z <- which(a | b)   ## position of all `TRUE`
  end <- match(y, z)    ## match for end position
  start <- c(1L, end[-length(end)] + 1L)    ## start position
  valid <- end > start  ## remove cases with `end = start`
  z[end[valid]] - z[start[valid]]
  }

## cross `a` and `b`
axb <- function (a, b) {
  if (any(a & b))
    stop ("Invalid input! `a` and `b` can't have TRUE at the same time!")
  x <- a2b(a, b); y <- a2b(b, a)
  if (which(a)[1L] < which(b)[1L]) cbind(a2b = x, b2a = c(NA_integer_, y))
  else cbind(a2b = c(NA_integer_, x), b2a = y)
  }

对于isOverbought和isOverSold，我们会获得：

result <- axb(isOverbought, isOverSold)

head(result)
#     a2b b2a
#[1,]   5  NA
#[2,]   7   3
#[3,]   3   1
#[4,]   8   5
#[5,]   2   6
#[6,]  10   2

由于isOverbought在TRUE之前有第一个isOverSold，所以第二列的第一个元素是NA。

Answer 3

这是一个简短的版本：

创建一个名为mktState的向量。如果超买为TRUE则为1，如果超卖为TRUE，则为1;如果前两个为NA，则为FALSE。（您只对以下情况进行编码：市场状态转换）
使用na.locf填充NA以及最后一次观察结果
现在使用rle函数

mktState <- ifelse(df$overBought == TRUE,1,ifelse(df$overSold == TRUE,-1,NA)) mktState <- na.locf(mktState)

获得'超买'运行：

> rle(mktState)$lengths[rle(mktState)$values == 1]
 [1]  5  7  3  8  2 10  7  3  1  2  4  2  5  6  3 11  4  1  5  2  4  6  1  1  8
[26]  7  3  1  1  1  1  3  2  3  1  6  1  1  1  3  2  4  2  1  6  8  8  1  5 15
[51]  2  5  4  2  1  1  3  4  7  1  7 11  1  3  4  2  4  1

这将为您提供'超卖'运行：

> rle(mktState)$lengths[rle(mktState)$values == -1]
 [1]  3  1  5  6  2  4  1  4  3  3  3  5  2  4  1 14  2  2 10  3  7  1 13  1  1
[26]  3  3  1  6  5  2  1  8  7  2  3  1  1  3  5  1  1  2  3  1  2  2  3  3  1
[51]  8  9  4  2  1  6  2  1  3  2  4  5  1  3  7  4  2  2

Answer 4

这是[有点长] tidyverse版本：

library(dplyr)
library(tidyr)

# put vectors in a data.frame
data.frame(isOverbought, isOverSold) %>%
    # evaluate each row separately
    rowwise() %>% 
    # add column with name of event for any TRUE, else NA
    mutate(change_type = ifelse(isOverbought | isOverSold, names(.)[c(isOverbought, isOverSold)], NA)) %>% 
    # reset grouping
    ungroup() %>% 
    # replace NA values with last non-NA value
    fill(change_type) %>% 
    # add a column of the cumulate number of changes in change_type
    mutate(changes = data.table::rleid(change_type)) %>% 
    # count number of rows in each changes and change_type grouping
    count(changes, change_type) %>% 
    # remove leading NAs
    na.omit() %>% 
    # reset grouping
    ungroup() %>% 
    # edit change into runs of two with integer division
    mutate(changes = changes %/% 2) %>% 
    # spread to wide form
    spread(change_type, n) %>% 
    # get rid of extra column
    select(-changes)

## # A tibble: 68 x 2
##    isOverbought isOverSold
## *         <int>      <int>
## 1             5          3
## 2             7          1
## 3             3          5
## 4             8          6
## 5             2          2
## 6            10          4
## 7             7          1
## 8             3          4
## 9             1          3
## 10            2          3
## # ... with 58 more rows

计算交替列之间的长度

4 个答案: