很抱歉,标题看起来很混乱。在以下数据中,
mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"), transaction = c (0,0,1,0,1,1) , time = c( 10, 20, 22, 27, 39, 47))
我有一些访问(当事务= 0时)我有一些交易其中(事务= 1)。我想知道最后一次访问与每个ID 的交易之间的时差:例如,ID最后一次拥有的时间访问时间为10,因为他的交易发生在时间22,有12个单位差异。 这是我的数据
id transaction time
1 1 0 10
2 2 0 20
3 1 1 22
4 1 0 27
5 2 1 39
6 1 1 47
我希望得到以下结果:
id transaction time dif
1 1 0 10 NA
2 2 0 20 NA
3 1 1 22 12
4 1 0 27 NA
5 2 1 39 19
6 1 1 47 20
我尝试了类似的东西,这显然是错误的
library(dplyr)
%>%
group_by( id) %>%
mutate( dif =first(time[transaction == 1])-last(time[transaction == 0]))
答案 0 :(得分:1)
df %>% group_by(id) %>% mutate(time1 = ifelse(transaction!=0,time-time[1],NA))
# id transaction time time1
# <int> <int> <int> <int>
# 1 1 0 10 NA
# 2 2 0 20 NA
# 3 1 1 22 12
# 4 1 0 27 NA
# 5 2 1 39 19
# 6 1 0 47 NA
答案 1 :(得分:1)
我们可以data.table
使用作业(:=
)来避免任何复制
library(data.table)
setDT(mydata)[, dif:=time[transaction!=0]- time[transaction==0],
by = id][transaction==0, dif:= NA][]
之前有一个值不匹配。以下代码修复了它
setDT(mydata)[, ind := cumsum(c(TRUE, diff(transaction==1)<0)),
id][, dif := time[transaction==1]-time[transaction==0], .(id, ind)
][transaction==0, dif:= NA][, ind := NULL][]
# id transaction time dif
#1: 1 0 10 NA
#2: 2 0 20 NA
#3: 1 1 22 12
#4: 1 0 27 NA
#5: 2 1 39 19
#6: 1 1 47 20
答案 2 :(得分:1)
使用基数R(假设每个id至少有一个使用transaction = 0的观察值和一个使用transaction = 1的观察值)
mydata$dif <- NA
mydata$dif[mydata$transaction==1] <- unlist(lapply(split(mydata, mydata$id), function(x) x$time[x$transaction==1] - x$time[x$transaction==0][1]))
id transaction time dif
1 1 0 10 NA
2 2 0 20 NA
3 1 1 22 12
4 1 0 27 NA
5 2 1 39 19
6 1 0 47 NA
答案 3 :(得分:1)
这也是一个选项 -
mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"),
transaction = c (0,0,1,0,1,1) ,
time = c( 10, 20, 22, 27, 39, 47))
id_types <- levels(factor(mydata$id))
mydata$dif <- rep(0, nrow(mydata))
lapply(temp, function(x, data = mydata)
{
mydata[mydata$id == x, "dif"] <<-
c(0,diff(mydata[mydata$id == x, "time"], 1)) *
(seq(0,length(mydata[mydata$id == x, "time"])-1) %% 2)
})
mydata[mydata$dif == 0, "dif"] <- NA
答案 4 :(得分:1)
试试这个
library(dplyr)
mydata<- data.frame(id= c("1","2","1", "1" ,"2" ,"1"), transaction = c (0,0,1,0,1,1) , time = c( 10, 20, 22, 27, 39, 47))
mydata$dif <- sapply(1:nrow(mydata), function(i) ifelse(mydata$transaction[i]!=0,mydata$time[i]-tail(filter(head(mydata,i-1),id==as.numeric(mydata$id[i]))$time,1),NA))
输出mydata
id transaction time dif
1 1 0 10 NA
2 2 0 20 NA
3 1 1 22 12
4 1 0 27 NA
5 2 1 39 19
6 1 1 47 20