获取与值匹配的data.table列的索引

时间:2019-11-07 13:47:15

标签: r dplyr data.table

DT = data.table(
      id = 1:5,
      a  = c(0,1,0,2,5),
      b  = c(1,0,2,4,4),
      c  = c(1,2,0,0,5))

#     id a b c
# 1:  1  0 1 1
# 2:  2  1 0 2
# 3:  3  0 2 0
# 4:  4  2 4 0
# 5:  5  5 4 5

我想从左侧开始确定第一列为0,并将列索引放在idx中。

#     id a b c idx
# 1:  1  0 1 1 2 
# 2:  2  1 0 2 3
# 3:  3  0 2 0 2
# 4:  4  2 4 0 4
# 5:  5  5 4 5 NA

(也欢迎使用非data.table的解决方案,例如dplyr

5 个答案:

答案 0 :(得分:3)

通过R为底的想法可以是

private inner class NoteViewHolder(view: View) : RecyclerView.ViewHolder(view) {

    val rowName: TextView = view.findViewById(R.id.rec_name)
    val rowType: TextView = view.findViewById(R.id.rec_type)
    val rowSub: TextView = view.findViewById(R.id.rec_sub)
    val daysAgo: TextView = view.findViewById(R.id.how_many_days_ago)

    init {
        view.setOnLongClickListener {
            val builder = AlertDialog.Builder(
                requireContext(),
                R.style.Theme_AppCompat_Light_Dialog_Alert
            )
            builder.setTitle("Notes Returned?")
            builder.setNegativeButton("Yes") { dialog, _ ->
                dialog.dismiss()
                context.db.myDao().deleteData(users[adapterPosition])
                notifyItemRemoved(adapterPosition)
            }
            val alert = builder.create()
            alert.show()
            true
        }
    }
}

答案 1 :(得分:2)

一种dplyr可能是:

DT %>%
 mutate(idx = if_else(rowSums(. == 0) == 0,
                      NA_integer_,
                      max.col(- ., ties.method = "first")))

  id a b c idx
1  1 0 1 1   2
2  2 1 0 2   3
3  3 0 2 0   2
4  4 2 4 0   4
5  5 5 4 5  NA

data.table中的内容相同:

DT[, idx := ifelse(rowSums(.SD == 0) == 0,
                   NA_integer_,
                   max.col(- .SD, ties.method = "first"))]

答案 2 :(得分:2)

data.table解决方案可能是:

DT[, idx := apply(.SD, 1, function(x) first(which(x == 0 )))]

#   id a b c idx
#1:  1 0 1 1   2
#2:  2 1 0 2   3
#3:  3 0 2 0   2
#4:  4 2 4 0   4
#5:  5 5 4 5  NA

答案 3 :(得分:2)

DT[, idx := which(.SD == 0)[1] + 1L, by = id]
DT
#    id a b c idx
# 1:  1 0 1 1   2
# 2:  2 1 0 2   3
# 3:  3 0 2 0   2
# 4:  4 2 4 0   4
# 5:  5 5 4 5  NA

答案 4 :(得分:0)

评论中的时间太长,因此是comm Wiki。与Sotos和tmfmnk相似,如果在id中找不到值,那么可以避免使用rowSums

DT[, idx := {
    x <- max.col(.SD==0, "first")
    replace(x, x==1L, NA_integer_)
}]

计时代码:

library(data.table)
set.seed(0L)
nr <- 1e7
DT <- data.table(id=1:nr, a=sample(0:5, nr, TRUE), b=sample(0:5, nr, TRUE), c=sample(0:5, nr, TRUE))
DT0 <- copy(DT)
DT1 <- copy(DT)
DT2 <- copy(DT)
DT3 <- copy(DT)
DT4 <- copy(DT)

mtd0 <- function() {
    replace(max.col(-DT0[, -1] == 0, ties.method = 'first') + 1, rowSums(DT == 0) == 0, NA)

    #or break it into two lines If you want,
    i1 <- max.col(-DT0[,-1] == 0, ties.method = 'first') + 1
    replace(i1, rowSums(DT0 == 0) == 0, NA)
}

mtd1 <- function() {
    DT1[, idx := apply(.SD, 1, function(x) first(which(x == 0 )))]
}

mtd2 <- function() {
    DT2[, idx := which(.SD == 0)[1], by = id]
}

mtd3 <- function() {
    DT2[, idx := fifelse(rowSums(.SD == 0) == 0,
        NA_integer_,
        max.col(-.SD, ties.method = "first"))]
}

mtd4 <- function() {
    DT4[, idx := {
        x <- max.col(.SD==0, "first")
        replace(x, x==1L, NA_integer_)
    }]
}

bench::mark(mtd0(), mtd1(), mtd2(), 
    mtd3(), mtd4(), check=FALSE)

时间:

# A tibble: 5 x 13
  expression      min   median `itr/sec` mem_alloc `gc/sec` n_itr  n_gc total_time result    memory    time   gc    
  <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl> <int> <dbl>   <bch:tm> <list>    <list>    <list> <list>
1 mtd0()     136.98ms 145.67ms    5.41     347.8MB    1.80      3     1    554.1ms <dbl [1,~ <df[,3] ~ <bch:~ <tibb~
2 mtd1()        8.66s    8.66s    0.115     76.3MB    0.346     1     3      8.66s <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
3 mtd2()        57.3s    57.3s    0.0175    22.6MB    0.436     1    25      57.3s <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
4 mtd3()       86.6ms  90.69ms   10.3      171.7MB    1.72      6     1   582.21ms <df[,5] ~ <df[,3] ~ <bch:~ <tibb~
5 mtd4()       48.9ms  50.12ms   18.4       97.6MB    1.84     10     1   544.43ms <df[,5] ~ <df[,3] ~ <bch:~ <tibb~