这似乎应该是which
的简单应用,但我无法理解。我有一个矩阵,表明一个人是否在某个调查波中出现或缺席。我想将它转换为向量列表,矩阵每行一个列表元素,指示一个人出现的时间范围。这是我正在尝试做的一个工作示例:
in.wave <- structure(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE,
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE,
TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE,
TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE,
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE,
TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE,
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE,
FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE,
TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE,
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE), .Dim = c(108L, 4L), .Dimnames = list(NULL, c("wave5",
"wave6", "wave7", "wave8")))
head(in.wave)
# wave5 wave6 wave7 wave8
# [1,] TRUE TRUE FALSE FALSE
# [2,] TRUE TRUE TRUE FALSE
# [3,] TRUE TRUE TRUE TRUE
# [4,] TRUE TRUE TRUE TRUE
# [5,] TRUE TRUE TRUE TRUE
# [6,] TRUE TRUE TRUE TRUE
# Current approach is pure brute force:
possibilities <- expand.grid(list(c(T, F), c(T, F), c(T, F), c(T, F)))
output <- list(
c(5.0, 8.0),
c(5.4, 8.0),
c(5.0, 5.4, 6.4, 8.0),
c(6.4, 8.0),
c(5.0, 6.4, 7.4, 8.0),
c(5.4, 6.4, 7.4, 8.0),
c(5.0, 5.4, 7.4, 8.0),
c(7.4, 8.0),
c(5.0, 7.4),
c(5.4, 7.4),
c(5.0, 5.4, 6.4, 7.4),
c(6.4, 7.4),
c(5.0, 6.4),
c(5.4, 6.4),
c(5.0, 5.4),
c(0)
)
desired <- apply(in.wave, 1, function(trial) {
output[[which(apply(possibilities, 1, function(x) all(trial == x)))]]
})
head(desired)
# [[1]]
# [1] 5.0 6.4
#
# [[2]]
# [1] 5.0 7.4
#
# [[3]]
# [1] 5 8
#
# [[4]]
# [1] 5 8
#
# [[5]]
# [1] 5 8
#
# [[6]]
# [1] 5 8
如示例代码所示,我目前正在通过强力实现这一点 - 我列举了所有2 ^ 4种可能性,写下输出应该是什么,然后为{}的每一行查找正确的输出{1}}。由于我将这个扩展到8列,我宁愿不写出所有2 ^ 8种可能性。
所需输出是偶数长度的向量列表,其中每对元素指示某人何时进入和离开调查。因此,例如,如果你有一个人出现在所有波浪中,那么所需的输出将是向量in.wave
,如果你有一个人在wave 7中缺席,那么所需的输出将是{ {1}}。
如果人们不能在中间的wave中丢失,您可以使用c(5.0, 8.0)
和c(5.0, 6.4, 7.4, 8.0)
之类的内容来获取它们所在的值。但是多次使用法术会让我失望。任何想法如何简洁地解决这个问题?
答案 0 :(得分:1)
根据42的建议,我整理了一个使用rle
的功能。这似乎给出了正确的答案,但希望这里的其他人能有更优雅的解决方案。
makeJoinLeaveVector <- function(x) {
# Recodes a logical vector into a set of paired values indicating when people
# enter or leave a population
#
# Args:
# x: a logical vector
#
# Returns:
# A vector of length(rle(x)$values) * 2 with paired values indicating 0.6
# before each first value of TRUE in a set of them, and .4 after each last
# value of TRUE in a set of them.
stopifnot(is.logical(x))
# Get the run length encoding
x.rle <- rle(x)
# Now save a vector for each of the TRUE values
is.true <- which(x.rle$values)
out <- c(is.true, is.true + x.rle$lengths[is.true]) - 0.6
names(out) <- NULL
# Recode first and last possible values
out <- ifelse(out == 0.4, 1.0, out)
out <- ifelse(out == 4.4, 4.0, out)
return(sort(out) + 4)
}
desired <- apply(in.wave, 1, makeJoinLeaveVector)
head(desired)
# [[1]]
# [1] 5.0 6.4
#
# [[2]]
# [1] 5.0 7.4
#
# [[3]]
# [1] 5 8
#
# [[4]]
# [1] 5 8
#
# [[5]]
# [1] 5 8
#
# [[6]]
# [1] 5 8
答案 1 :(得分:0)
这不是更优雅,但我想我现在已经加了它。 代码的结束位可能写得更干净。
方法是计算in.wave[, 2:4] - in.wave[, 1:3]
,当某人退出调查时给出-1,当有人进入调查时给出1。
times <- c(5, 5.4, 6.4, 7.4, 8)
# exiting gives a -1
# entering gives a 1
# No change gives a 0
transitions <- in.wave[, 2:4] - in.wave[, 1:3]
# Find the entrance and exit points of the survey
exits <- apply(transitions, 1, function(x) times[which(x == -1) + 1])
entrances <- apply(transitions, 1, function(x) times[which(x == 1) + 1])
# Combine entrances and exits
desired <- lapply(seq_len(nrow(in.wave)), function(x) sort(c(entrances[[x]], exits[[x]])))
# If no entrances, subject must have been in study from the beginning
desired <-
lapply(seq_len(nrow(in.wave)), function(x) if(length(entrances[[x]]) == 0){
c(times[1], desired[[x]])
} else {
desired[[x]]
})
# If no exits, subject must have remained in study until the end.
desired <-
lapply(seq_len(nrow(in.wave)), function(x) if(length(exits[[x]]) == 0){
c(desired[[x]], times[5])
} else {
desired[[x]]
})