如何用指示它们开始和结束位置的值替换TRUE值序列?

时间:2016-05-22 19:00:30

标签: r

这似乎应该是which的简单应用,但我无法理解。我有一个矩阵,表明一个人是否在某个调查波中出现或缺席。我想将它转换为向量列表,矩阵每行一个列表元素,指示一个人出现的时间范围。这是我正在尝试做的一个工作示例:

in.wave <- structure(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, 
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, 
TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, 
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, 
TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, 
TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, 
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, 
FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, 
TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, 
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, 
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 
TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, 
TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, 
FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, 
TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, 
TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, 
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, 
TRUE), .Dim = c(108L, 4L), .Dimnames = list(NULL, c("wave5", 
"wave6", "wave7", "wave8")))

head(in.wave)
#      wave5 wave6 wave7 wave8
# [1,]  TRUE  TRUE FALSE FALSE
# [2,]  TRUE  TRUE  TRUE FALSE
# [3,]  TRUE  TRUE  TRUE  TRUE
# [4,]  TRUE  TRUE  TRUE  TRUE
# [5,]  TRUE  TRUE  TRUE  TRUE
# [6,]  TRUE  TRUE  TRUE  TRUE

# Current approach is pure brute force:
possibilities <- expand.grid(list(c(T, F), c(T, F), c(T, F), c(T, F)))
output <- list(
  c(5.0, 8.0),
  c(5.4, 8.0),
  c(5.0, 5.4, 6.4, 8.0),
  c(6.4, 8.0),
  c(5.0, 6.4, 7.4, 8.0),
  c(5.4, 6.4, 7.4, 8.0),
  c(5.0, 5.4, 7.4, 8.0),
  c(7.4, 8.0),
  c(5.0, 7.4),
  c(5.4, 7.4),
  c(5.0, 5.4, 6.4, 7.4),
  c(6.4, 7.4),
  c(5.0, 6.4),
  c(5.4, 6.4),
  c(5.0, 5.4),
  c(0)
)

desired <- apply(in.wave, 1, function(trial) {
  output[[which(apply(possibilities, 1, function(x) all(trial == x)))]]
})

head(desired)
# [[1]]
# [1] 5.0 6.4
# 
# [[2]]
# [1] 5.0 7.4
# 
# [[3]]
# [1] 5 8
# 
# [[4]]
# [1] 5 8
# 
# [[5]]
# [1] 5 8
# 
# [[6]]
# [1] 5 8

如示例代码所示,我目前正在通过强力实现这一点 - 我列举了所有2 ^ 4种可能性,写下输出应该是什么,然后为{}的每一行查找正确的输出{1}}。由于我将这个扩展到8列,我宁愿不写出所有2 ^ 8种可能性。

所需输出是偶数长度的向量列表,其中每对元素指示某人何时进入和离开调查。因此,例如,如果你有一个人出现在所有波浪中,那么所需的输出将是向量in.wave,如果你有一个人在wave 7中缺席,那么所需的输出将是{ {1}}。

如果人们不能在中间的wave中丢失,您可以使用c(5.0, 8.0)c(5.0, 6.4, 7.4, 8.0)之类的内容来获取它们所在的值。但是多次使用法术会让我失望。任何想法如何简洁地解决这个问题?

2 个答案:

答案 0 :(得分:1)

根据42的建议,我整理了一个使用rle的功能。这似乎给出了正确的答案,但希望这里的其他人能有更优雅的解决方案。

makeJoinLeaveVector <- function(x) {
  # Recodes a logical vector into a set of paired values indicating when people
  # enter or leave a population
  #
  # Args:
  #   x: a logical vector
  #
  # Returns:
  #   A vector of length(rle(x)$values) * 2 with paired values indicating 0.6
  #   before each first value of TRUE in a set of them, and .4 after each last
  #   value of TRUE in a set of them.
  stopifnot(is.logical(x))

  # Get the run length encoding
  x.rle <- rle(x)

  # Now save a vector for each of the TRUE values
  is.true <- which(x.rle$values)

  out <- c(is.true, is.true + x.rle$lengths[is.true]) - 0.6
  names(out) <- NULL

  # Recode first and last possible values
  out <- ifelse(out == 0.4, 1.0, out)
  out <- ifelse(out == 4.4, 4.0, out)

  return(sort(out) + 4)
}

desired <- apply(in.wave, 1, makeJoinLeaveVector)

head(desired)
# [[1]]
# [1] 5.0 6.4
# 
# [[2]]
# [1] 5.0 7.4
# 
# [[3]]
# [1] 5 8
# 
# [[4]]
# [1] 5 8
# 
# [[5]]
# [1] 5 8
# 
# [[6]]
# [1] 5 8

答案 1 :(得分:0)

这不是更优雅,但我想我现在已经加了它。 代码的结束位可能写得更干净。

方法是计算in.wave[, 2:4] - in.wave[, 1:3],当某人退出调查时给出-1,当有人进入调查时给出1。

times <- c(5, 5.4, 6.4, 7.4, 8)

# exiting gives a -1
#   entering gives a 1
#   No change gives a 0
transitions <-  in.wave[, 2:4] - in.wave[, 1:3]

# Find the entrance and exit points of the survey
exits <- apply(transitions, 1, function(x) times[which(x == -1) + 1])
entrances <- apply(transitions, 1, function(x) times[which(x == 1) + 1])


# Combine entrances and exits
desired <- lapply(seq_len(nrow(in.wave)), function(x) sort(c(entrances[[x]], exits[[x]])))

# If no entrances, subject must have been in study from the beginning
desired <- 
  lapply(seq_len(nrow(in.wave)), function(x) if(length(entrances[[x]]) == 0){
                                               c(times[1], desired[[x]])
                                             } else {
                                               desired[[x]]
                                             })
# If no exits, subject must have remained in study until the end.
desired <- 
  lapply(seq_len(nrow(in.wave)), function(x) if(length(exits[[x]]) == 0){
                                               c(desired[[x]], times[5])
                                             } else {
                                               desired[[x]]
                                             })