我的目标是快速获取数据集中每辆车的常见对手数量。例如,如果Datsun参加了马自达RX4和Merc 230,而马自达RX4也参加了Merc 230比赛,那么我们将返回' 1'对于普通对手的数量。提供了一个使用mtcars数据的示例,我在其上运行我的函数。对于如此小的数据集,它需要大约0.32秒,但对于较大的数据集,它需要相当长的时间。
#setting up sample dataset#
set.seed(44)
data(mtcars)
mtcars$car<-row.names(mtcars)
mtcars<-as.data.table(mtcars)
for(i in 1:nrow(mtcars)){
mtcars[i,count:=sample(1:5,1)]
}
expanded <- data.table(car = rep(mtcars$car, mtcars$count),
opponent = sample(mtcars$car, mtcars$count),
wins=sample(1:mtcars$count,mtcars$count))
head(expanded)
car opponent wins
1: Mazda RX4 Chrysler Imperial 1
2: Mazda RX4 Merc 280 2
3: Mazda RX4 Toyota Corolla 4
4: Mazda RX4 Merc 230 3
5: Mazda RX4 Wag Chrysler Imperial 1
6: Mazda RX4 Wag Merc 280 2
#this is the function i use now which takes a while#
commonCars<-function(carA,carB){
tA<-unique(expanded[car==carA,opponent]) #getting unique opponents of first car
tB<-unique(expanded[car==carB,opponent]) #getting unique opponents of first car's opponent
commonTeams<-tB[tB %in% tA] #getting their common opponents
return(nrow(expanded[car==carA & opponent %in% commonTeams,])) #returning the number of commons
}
ptm<-proc.time()
for(i in unique(expanded[,car])) { #looping through each individual car
for(j in unique(expanded[car==i,opponent])){ #getting the cars they raced#
expanded[car==i & opponent==j,common:=commonCars(i,j)]
}
}
proc.time()-ptm
user system elapsed
0.29 0.00 0.30
head(expanded)
car opponent wins common
1: Mazda RX4 Maserati Bora 4 3
2: Mazda RX4 Hornet 4 Drive 2 3
3: Mazda RX4 Datsun 710 3 3
4: Mazda RX4 Chrysler Imperial 1 1
5: Mazda RX4 Wag Maserati Bora 4 1
6: Mazda RX4 Wag Hornet 4 Drive 2 1