从较长的向量中提取具有一定质量的向量

时间:2019-01-10 14:53:56

标签: r data.table

我有以下data.table:

DT <- data.table(A = c(rep("aa",2),rep("bb",2)),
             B = c(rep("H",2),rep("Na",2)),
             Low = c(0,3,1,1),
             High = c(8,10,9,8),
             Time =c("0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10"),
             Intensity = c("0,0,0,0,561464,0,0,0,0,0,0","0,0,0,6548,5464,5616,0,0,0,68716,0","5658,12,6548,6541,8,5646854,54565,56465,546,65,0","0,561464,0,0,0,0,0,0,0,0,0")
             )

并使用此代码提取某个值以上的最大连续强度值。有关此计算方式的更详细说明,请参见Reading and counting of consecutive points

newCols <- do.call(rbind, Map(function(u, v, x, y) {
u1 <- as.numeric(u)
v1 <- as.numeric(v)
lb <- which.min(abs(x - u1))
ub <- which.min(abs(y - u1))
v3 <- as.numeric(v[(lb+1):(ub-1)])
i3 = with(rle(v3 > min(as.numeric(v[c(lb, ub)]))), 
        pmax(max(lengths[values]), 0))
data.frame(Consec.Points.base = i3)
},
strsplit(DT$Time, ","), strsplit(DT$Intensity, ","), DT$Low, DT$High))

DT <- cbind(DT, newCols)

我想知道如何能代替获取Consec.Points.base的长度来提取其实际点(时间和强度)作为两个向量? 提前非常感谢!

1 个答案:

答案 0 :(得分:1)

我认为这可以回答您的问题,但是如果我输入有误,或者需要进一步考虑/澄清,请告诉我。

DT <- data.table(A = c(rep("aa",2),rep("bb",2)),
                 B = c(rep("H",2),rep("Na",2)),
                 Low = c(0,3,1,1),
                 High = c(8,10,9,8),
                 Time =c("0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10","0,1,2,3,4,5,6,7,8,9,10"),
                 Intensity = c("0,0,0,0,561464,0,0,0,0,0,0","0,0,0,6548,5464,5616,0,0,0,68716,0","5658,12,6548,6541,8,5646854,54565,56465,546,65,0","0,561464,0,0,0,0,0,0,0,0,0")
)

# unique identifier
DT[, i := .I]

# re-structure
DT2 <- DT[, .(Time = as.numeric(strsplit(Time, ",")[[1]]),
              Intensity = as.numeric(strsplit(Intensity, ",")[[1]])), by = i]
DT2 <- merge(DT2, DT[, .(i,A,B,Low,High)], by="i")
DT2 <- DT2[between(Time, Low, High, incbounds = FALSE),]
DT2[, IntensityGood := Intensity != min(Intensity), by=i]

# encode each part of sequence with its own value, if not FALSE
encoder <- function(x){
  rle.response <- rle(x)
  v2 <- rep(0, length(rle.response$values))
  v2[rle.response$values!=FALSE] <- which(rle.response$values != FALSE)
  rep(v2, rle.response$lengths)
}
DT2[, encodeI := encoder(IntensityGood), by = i]

# remove ones which are all 0, easily handle seperately
DT3 <- DT2[, test := all(encodeI==0), by=i][test==FALSE,][, test:=NULL]

# get count - can infer missing are 0
count <- DT3[encodeI!=0, .(max(table(encodeI))), by = i]

# get sequence
findMaxDt <- DT3[encodeI != 0, .N, by=.(i, encodeI)]
DT3 <- merge(DT3, findMaxDt, by=c("i", "encodeI"))
DT3 <- DT3[, Best := N==max(N), by=i]
DT3[Best==TRUE, .(list(Intensity)), by=i]