对于每个组,将最后一个数据减去0,将第一个数据减去1

时间:2016-11-29 05:54:57

标签: r dplyr

很抱歉,标题看起来很混乱。在以下数据中,

mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"), transaction = c (0,0,1,0,1,1) , time = c( 10, 20, 22, 27, 39, 47))

我有一些访问(当事务= 0时)我有一些交易其中(事务= 1)。我想知道最后一次访问每个ID 交易之间的时差:例如,ID最后一次拥有的时间访问时间为10,因为他的交易发生在时间22,有12个单位差异。 这是我的数据

  id transaction time
1  1           0   10
2  2           0   20
3  1           1   22
4  1           0   27
5  2           1   39
6  1           1   47

我希望得到以下结果:

   id transaction time  dif
1  1           0   10   NA
2  2           0   20   NA
3  1           1   22   12
4  1           0   27   NA
5  2           1   39   19
6  1           1   47   20

我尝试了类似的东西,这显然是错误的

library(dplyr)
%>%
group_by( id) %>%

mutate(  dif =first(time[transaction == 1])-last(time[transaction == 0]))

5 个答案:

答案 0 :(得分:1)

df %>% group_by(id) %>% mutate(time1 = ifelse(transaction!=0,time-time[1],NA))

#      id transaction  time time1
# <int>       <int> <int> <int>
# 1     1           0    10    NA
# 2     2           0    20    NA
# 3     1           1    22    12
# 4     1           0    27    NA
# 5     2           1    39    19
# 6     1           0    47    NA

答案 1 :(得分:1)

我们可以data.table使用作业(:=)来避免任何复制

library(data.table)
setDT(mydata)[, dif:=time[transaction!=0]- time[transaction==0], 
                               by = id][transaction==0, dif:= NA][]

更新

之前有一个值不匹配。以下代码修复了它

setDT(mydata)[, ind := cumsum(c(TRUE, diff(transaction==1)<0)),
 id][, dif := time[transaction==1]-time[transaction==0], .(id, ind)
    ][transaction==0, dif:= NA][, ind := NULL][]
#    id transaction time dif
#1:  1           0   10  NA
#2:  2           0   20  NA
#3:  1           1   22  12
#4:  1           0   27  NA
#5:  2           1   39  19
#6:  1           1   47  20

答案 2 :(得分:1)

使用基数R(假设每个id至少有一个使用transaction = 0的观察值和一个使用transaction = 1的观察值)

 mydata$dif <- NA
 mydata$dif[mydata$transaction==1] <- unlist(lapply(split(mydata, mydata$id), function(x) x$time[x$transaction==1] - x$time[x$transaction==0][1]))

 id transaction time dif
1  1           0   10  NA
2  2           0   20  NA
3  1           1   22  12
4  1           0   27  NA
5  2           1   39  19
6  1           0   47  NA

答案 3 :(得分:1)

这也是一个选项 -

mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"),
                transaction = c (0,0,1,0,1,1) ,
                time = c( 10, 20, 22, 27, 39, 47))

id_types <- levels(factor(mydata$id))

mydata$dif <- rep(0, nrow(mydata))
lapply(temp, function(x, data = mydata) 
       {
        mydata[mydata$id == x, "dif"] <<-
        c(0,diff(mydata[mydata$id == x, "time"], 1)) *
        (seq(0,length(mydata[mydata$id == x, "time"])-1) %% 2)
       })
mydata[mydata$dif == 0, "dif"] <- NA

答案 4 :(得分:1)

试试这个

library(dplyr)

mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"), transaction = c (0,0,1,0,1,1) , time = c( 10, 20, 22, 27, 39, 47))

mydata$dif <- sapply(1:nrow(mydata), function(i) ifelse(mydata$transaction[i]!=0,mydata$time[i]-tail(filter(head(mydata,i-1),id==as.numeric(mydata$id[i]))$time,1),NA))

输出mydata

  id transaction time dif
1  1           0   10  NA
2  2           0   20  NA
3  1           1   22  12
4  1           0   27  NA
5  2           1   39  19
6  1           1   47  20