我想知道R中是否存在适当的区间操作和比较框架。
经过一番搜索后,我才能找到以下内容: - 基本包中的函数findInterval。 (但我很难理解) - 这里和那里有关于联合和交集的一些答案(特别是:http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html)
您是否知道实施一套全面的工具以轻松处理间隔操作中的频繁任务,例如包含/ setdiff / union / intersection /等。 (例如,请参阅此处获取功能列表)? 或者你会有建议这种方法的建议吗?
以下是我这方面的一些草案。它肯定很尴尬,仍然有一些错误,但它可能说明我在寻找什么。UNION
interval_union <- function(df){ # for data frame
df <- interval_clean(df)
if(is.empty(df)){
return(as.data.frame(NULL))
} else {
if(is.POSIXct(df[,1])) {
dated <- TRUE
df <- colwise(as.numeric)(df)
} else {
dated <- FALSE
}
M <- as.matrix(df)
o <- order(c(M[, 1], M[, 2]))
n <- cumsum( rep(c(1, -1), each=nrow(M))[o])
startPos <- c(TRUE, n[-1]==1 & n[-length(n)]==0)
endPos <- c(FALSE, n[-1]==0 & n[-length(n)]==1)
M <- M[o]
if(dated == TRUE) {
df2 <- colwise(mkDateTime)(as.data.frame(cbind(M[startPos], M[endPos])), from.s = TRUE)
} else {
df2 <- as.data.frame(cbind(M[startPos], M[endPos]))
}
colnames(df2) <- colnames(df)
# print(df2)
return(df2)
}
}
union_1_1 <- function(test, ref){
names(ref) <- names(test)
tmp <- interval_union(as.data.frame(rbind(test, ref)))
return(tmp)
}
union_1_n <- function(test, ref){
return(union_1_1(test, ref))
}
union_n_n <- function(test, ref){
testnn <- adply(.data = test, 1, union_1_n, ref, .expand = FALSE)
return(testnn)
}
ref_interval_union <- function(df, ref){
tmp0 <- adply(df, 1, union_1_1, ref, .expand = FALSE) # set to FALSE to keep ID
return(tmp0)
}
交会
interval_intersect <- function(df){
# adapted from : http://r.789695.n4.nabble.com/Union-Intersect-two-continuous-sets-td4224545.html
M <- as.matrix(df)
L <- max(M[, 1])
R <- min(M[, 2])
Inew <- if (L <= R) c(L, R) else c()
if (!is.empty(Inew)){
df2 <- t(as.data.frame(Inew))
colnames(df2) <- colnames(df)
rownames(df2) <- NULL
} else {
df2 <- NULL
}
return(as.data.frame(df2))
}
ref_interval_intersect <- function(df, ref){
tmpfun <- function(a, b){
names(b) <- names(a)
tmp <- interval_intersect(as.data.frame(rbind(a, b)))
return(tmp)
}
tmp0 <- adply(df, 1, tmpfun, ref, .expand = FALSE) # [,3:4]
#if(!is.empty(tmp0)) colnames(tmp0) <- colnames(df)
return(tmp0)
}
int_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
names(re) <- names(te)
tmp0 <- c(max(te[1, 1], re[1, 1]), min(te[1, 2], re[1, 2]))
if(tmp0[1]>tmp0[2]) tmp0 <- NULL # inverse of a correct interval --> VOID
if(!is.empty(tmp0)){
tmp1 <- colwise(mkDateTime)(as.data.frame(t(as.data.frame(tmp0))))
colnames(tmp1) <- colnames(test)
} else {
tmp1 <- data.frame(NULL)
}
return(tmp1)
}
int_1_n <- function(test, ref){
test1 <- adply(.data = ref, 1, int_1_1, test = test, .expand = FALSE)
if(is.empty(test1)){
return(data.frame(NULL))
} else {
testn <- interval_union(test1[,2:3])
return(testn)
}
}
int_n_n <- function(test, ref){
testnn <- adply(.data = test, 1, int_1_n, ref, .expand = FALSE)
# return(testnn[,2:3]) # return interval set without index (1st column)
return(testnn) # return interval set with index (1st column) --> usefull to go with merge to keep metadata going alon g with interval description
}
int_intersect <- function(df, ref){
mycols <- colnames(df)
df$X1 <- 1:nrow(df)
test <- df[, 1:2]
tmp <- int_n_n(test, ref)
intersection <- merge(tmp, df, by = "X1", suffixes = c("", "init"))
return(intersection[,mycols])
}
排除
excl_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
names(re) <- names(te)
if(te[1] < re[1]){ # Lower Bound
if(te[2] > re[1]){ # overlap
x <- unlist(c(te[1], re[1]))
} else { # no overlap
x <- unlist(c(te[1], te[2]))
}
} else { # test > ref on lower bound side
x <- NULL
}
if(te[2] > re[2]){ # Upper Bound
if(te[1] < re[2]){ # overlap
y <- unlist(c(re[2], te[2]))
} else { # no overlap
y <- unlist(c(te[1], te[2]))
}
} else { # test < ref on upper bound side
y <- NULL
}
if(is.empty(x) & is.empty(y)){
tmp0 <- NULL
tmp1 <- tmp0
} else {
tmp0 <- as.data.frame(rbind(x, y))
colnames(tmp0) <- colnames(test)
tmp1 <- interval_union(tmp0)
}
return(tmp1)
}
excl_1_n <- function(test, ref){
testn0 <- adply(.data = ref, 1, excl_1_1, test = test, .expand=FALSE)
# boucle pour intersecter successivement les intervalles sets, pour gérer les intervalles disjoints (identifiés par X1, col1)
tmp <- range(testn0)
names(tmp) <- colnames(testn0)[2:3]
tmp <- as.data.frame(t(tmp))
for(i in unique(testn0[,1])){
tmp <- int_n_n(tmp, testn0[testn0[,1]==i, 2:3])
}
return(tmp)
}
夹杂
incl_1_1 <- function(test, ref){
te <- as.vector(test)
re <- as.vector(ref)
if(te[1] >= re[1] & te[2] <= re[2]){ return(TRUE) } else { return(FALSE) }
}
incl_1_n <- function(test, ref){
testn <- adply(.data = ref, 1, incl_1_1, test = test)
return(any(testn[,ncol(testn)]))
}
incl_n_n <- function(test, ref){
testnn <- aaply(.data = test, 1, incl_1_n, ref, .expand = FALSE)
names(testnn) <- NULL
return(testnn)
}
flat_incl_n_n <- function(test, ref){
ref <- interval_union(ref)
return(incl_n_n(test, ref))
}
# testing for a vector, instead of an interval set
incl_x_1 <- function(x, ref){
test <- (x>=ref[1,1] & x<ref[1,2])
return(test)
}
incl_x_n <- function(x, ref){
test <- any(x>=ref[,1] & x<ref[,2])
return(test)
}
答案 0 :(得分:7)
我认为您可以充分利用sets
包中许多与时间间隔相关的功能。
这是一个小例子,说明了软件包对区间构造,交集,集差异,并集和互补的支持,以及它在区间中包含的测试。这些和许多其他相关功能都记录在?interval
的帮助页面上。
library(sets)
i1 <- interval(1,6)
i2 <- interval(5,10)
i3 <- interval(200,400)
i4 <- interval(202,402)
i5 <- interval_union(interval_intersection(i1,i2),
interval_symdiff(i3,i4))
i5
# [5, 6] U [200, 202) U (400, 402]
interval_complement(i5)
# [-Inf, 5) U (6, 200) U [202, 400] U (402, Inf]
interval_contains_element(i5, 5.5)
# [1] TRUE
interval_contains_element(i5, 201)
# [1] TRUE
如果您的时间间隔目前以两列data.frame进行编码,则可以使用mapply()
之类的内容将其转换为sets
包使用的类型的时间间隔:
df <- data.frame(lBound = c(1,5,100), uBound = c(10, 6, 200))
Ints <- with(df, mapply("interval", l=lBound, r=uBound, SIMPLIFY=FALSE))
Ints
# [[1]]
# [1, 10]
# [[2]]
# [5, 6]
# [[3]]
# [100, 200]