我试图弄清楚如何计算从一列表示True到另一列表示True时的行数。我尝试使用行程编码,但无法弄清楚如何从每列中获取交替值。
set.seed(42)
s<-sample(c(0,1,2,3),500,replace=T)
isOverbought<-s==1
isOverSold<-s==0
head(cbind(isOverbought,isOverSold),20)
res<-rle(isOverSold)
tt<-res[res$values==0] #getting when Oversold is true
> head(cbind(isOverbought,isOverSold))
[1,] FALSE FALSE
[2,] FALSE FALSE
[3,] TRUE FALSE <-starting condition is overbought
[4,] FALSE FALSE
[5,] FALSE FALSE
[6,] FALSE FALSE
[7,] FALSE FALSE
[8,] FALSE TRUE <-is oversold. length from overbought to oversold = 5
[9,] FALSE FALSE
[10,] FALSE FALSE
[11,] TRUE FALSE <- is overbought. length from oversold to overbought = 3
[12,] FALSE FALSE
[13,] FALSE FALSE
[14,] TRUE FALSE
[15,] TRUE FALSE
[16,] FALSE FALSE
[17,] FALSE FALSE
[18,] FALSE TRUE <-is oversold. length from overbought to oversold = 7
[19,] TRUE FALSE <- is overbought. length from oversold to overbought = 1
[20,] FALSE FALSE
目标
overboughtTOoversold oversoldTOoverbought
5 3
7 1
答案 0 :(得分:2)
这个答案的假设是至少有一个超买/超卖过渡(任一方向),因此数据中至少有两行。通过计算超买和超卖条件的数量并确保两者都大于一,可以很容易地检查这种情况。
关键是要消除连续的超买和超卖情况,以便我们只有交替的超买和超卖情况。一种方法是:
## detect where we are overbought and oversold
i1 <- which(isOverbought)
i2 <- which(isOverSold)
## concatenate into one vector
i3 <- c(i1,i2)
## sort these and get the indices from the sort
i4 <- order(i3)
## at this point consecutive overbought or oversold conditions
## will be marked by a difference of 1 in i4 while alternating
## conditions will be marked by something other than 1. So
## filter those out to get i6. BTW, consecutive here does not mean
## consecutive rows in the data but consecutive occurrence of
## either overbought or oversold conditions without an intervening
## condition of the other. The assumption for at least one transition
## in the data is needed for this to work.
i5 <- diff(i4)
i6 <- i4[c(1,which(i5 != 1)+1)]
## then recover the alternating rows of overbought and oversold conditions in i7
i7 <- i3[i6]
## take the difference and format the output
## I need to credit @akrun for this part
i8 <- diff(i7)
## need to determine which is first
if (i1[1] < i2[1]) {
overboughtTOoversold <- i8[c(TRUE, FALSE)]
oversoldTOoverbought <- i8[c(FALSE, TRUE)]
} else {
overboughtTOoversold <- i8[c(FALSE, TRUE)]
oversoldTOoverbought <- i8[c(TRUE, FALSE)]
}
d1 <- cbind(overboughtTOoversold, oversoldTOoverbought)
print(head(d1))
## overboughtTOoversold oversoldTOoverbought
##[1,] 5 3
##[2,] 7 1
##[3,] 3 5
##[4,] 8 6
##[5,] 2 2
##[6,] 10 4
cbind
可能会生成一个警告,指出列的长度不同。要摆脱这种情况,只需在最后用NA
填充。
上述更紧凑的版本是:
i3 <- c(which(isOverbought), which(isOverSold))
i4 <- order(i3)
i8 <- diff(i3[i4[c(1,which(diff(i4) != 1)+1)]])
if (which(isOverbought)[1] < which(isOverSold)[1]) {
overboughtTOoversold <- i8[c(TRUE, FALSE)]
oversoldTOoverbought <- i8[c(FALSE, TRUE)]
} else {
overboughtTOoversold <- i8[c(FALSE, TRUE)]
oversoldTOoverbought <- i8[c(TRUE, FALSE)]
}
d1 <- cbind(overboughtTOoversold, oversoldTOoverbought)
答案 1 :(得分:2)
这足以解决您的问题。
## `a` to `b`
a2b <- function (a, b) {
x <- which(a) ## position of `TRUE` in `a`
y <- which(b) ## position of `TRUE` in `b`
z <- which(a | b) ## position of all `TRUE`
end <- match(y, z) ## match for end position
start <- c(1L, end[-length(end)] + 1L) ## start position
valid <- end > start ## remove cases with `end = start`
z[end[valid]] - z[start[valid]]
}
## cross `a` and `b`
axb <- function (a, b) {
if (any(a & b))
stop ("Invalid input! `a` and `b` can't have TRUE at the same time!")
x <- a2b(a, b); y <- a2b(b, a)
if (which(a)[1L] < which(b)[1L]) cbind(a2b = x, b2a = c(NA_integer_, y))
else cbind(a2b = c(NA_integer_, x), b2a = y)
}
对于isOverbought
和isOverSold
,我们会获得:
result <- axb(isOverbought, isOverSold)
head(result)
# a2b b2a
#[1,] 5 NA
#[2,] 7 3
#[3,] 3 1
#[4,] 8 5
#[5,] 2 6
#[6,] 10 2
由于isOverbought
在TRUE
之前有第一个isOverSold
,所以第二列的第一个元素是NA
。
答案 2 :(得分:2)
这是一个简短的版本:
TRUE
则为1,如果超卖为TRUE
,则为1;如果前两个为NA
,则为FALSE
。(您只对以下情况进行编码:市场状态转换)na.locf
填充NA
以及最后一次观察结果现在使用rle
函数
mktState <- ifelse(df$overBought == TRUE,1,ifelse(df$overSold == TRUE,-1,NA))
mktState <- na.locf(mktState)
获得'超买'运行:
> rle(mktState)$lengths[rle(mktState)$values == 1]
[1] 5 7 3 8 2 10 7 3 1 2 4 2 5 6 3 11 4 1 5 2 4 6 1 1 8
[26] 7 3 1 1 1 1 3 2 3 1 6 1 1 1 3 2 4 2 1 6 8 8 1 5 15
[51] 2 5 4 2 1 1 3 4 7 1 7 11 1 3 4 2 4 1
这将为您提供'超卖'运行:
> rle(mktState)$lengths[rle(mktState)$values == -1]
[1] 3 1 5 6 2 4 1 4 3 3 3 5 2 4 1 14 2 2 10 3 7 1 13 1 1
[26] 3 3 1 6 5 2 1 8 7 2 3 1 1 3 5 1 1 2 3 1 2 2 3 3 1
[51] 8 9 4 2 1 6 2 1 3 2 4 5 1 3 7 4 2 2
答案 3 :(得分:0)
这是[有点长] tidyverse版本:
library(dplyr)
library(tidyr)
# put vectors in a data.frame
data.frame(isOverbought, isOverSold) %>%
# evaluate each row separately
rowwise() %>%
# add column with name of event for any TRUE, else NA
mutate(change_type = ifelse(isOverbought | isOverSold, names(.)[c(isOverbought, isOverSold)], NA)) %>%
# reset grouping
ungroup() %>%
# replace NA values with last non-NA value
fill(change_type) %>%
# add a column of the cumulate number of changes in change_type
mutate(changes = data.table::rleid(change_type)) %>%
# count number of rows in each changes and change_type grouping
count(changes, change_type) %>%
# remove leading NAs
na.omit() %>%
# reset grouping
ungroup() %>%
# edit change into runs of two with integer division
mutate(changes = changes %/% 2) %>%
# spread to wide form
spread(change_type, n) %>%
# get rid of extra column
select(-changes)
## # A tibble: 68 x 2
## isOverbought isOverSold
## * <int> <int>
## 1 5 3
## 2 7 1
## 3 3 5
## 4 8 6
## 5 2 2
## 6 10 4
## 7 7 1
## 8 3 4
## 9 1 3
## 10 2 3
## # ... with 58 more rows