我有一个看起来像这样的数据框
df.data <- data.frame(x=sample(1:9, 10, replace = T), y=sample(1:9, 10, replace=T), vx=sample(-1:1, 10, replace=T), vy=sample(-1:1, 10, replace=T))
x和y是位置。 vx和vy是2d向量的x,y值。我想根据x和y值获取此数据框和“bin”,但是对vx和vy执行计算。此函数执行此操作,但它使用的循环对我的数据集来说太慢了。
slowWay <- function(df)
{
df.bin <- data.frame(expand.grid(x=0:3, y=0:3, vx=0, vy=0, count=0))
for(i in 1:nrow(df))
{
x.bin <- floor(df[i, ]$x / 3)
y.bin <- floor(df[i, ]$y / 3)
print(c(x.bin, y.bin))
df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$vx = df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$vx + df[i, ]$vx
df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$vy = df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$vy + df[i, ]$vy
df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$count = df.bin[df.bin$x == x.bin & df.bin$y == y.bin, ]$count + 1
}
return(df.bin)
}
这种类型的2D分箱是否可以以非循环方式进行?
答案 0 :(得分:2)
这是另一种更快捷的方法,包括未填充的bin组合:
fasterWay <- function(df.data) {
a1 <- aggregate(df.data[,3:4], list(x=floor(df.data$x/3), y=floor(df.data$y/3)), sum)
a2 <- aggregate(list(count=rep(NA,nrow(df.data))), list(x=floor(df.data$x/3), y=floor(df.data$y/3)), length)
result <- merge(expand.grid(y=0:3,x=0:3), merge(a1,a2), by=c("x","y"), all=TRUE)
result[is.na(result)] <- 0
result <- result[order(result$y, result$x),]
rownames(result) <- NULL
result
}
它给了我:
x y vx vy count
1 0 0 0 0 1
2 0 1 0 0 0
3 0 2 -1 -1 1
4 0 3 0 0 0
5 1 0 -1 -1 1
6 1 1 0 0 0
7 1 2 0 0 0
8 1 3 -1 0 2
9 2 0 -1 -1 1
10 2 1 0 0 0
11 2 2 -1 1 2
12 2 3 0 0 1
13 3 0 0 0 0
14 3 1 0 0 0
15 3 2 -1 0 1
16 3 3 0 0 0
答案 1 :(得分:1)
这是一种方法,但如果您想要使用未填充的箱柜组合的完整记录,则可能需要在几个步骤中执行此操作:
> by(df.data[, c("vx", "vy")], # input data
list(x.bin=floor(df.data$x / 3), y.bin=floor(df.data$y / 3)), # grouping
function(df) sapply(df, function(x) c(Sum=sum(x), Count=length(x) ) ) ) #calcs
x.bin: 0
y.bin: 1
vx vy
Sum 0 1
Count 1 1
---------------------------------------------------------------------
x.bin: 1
y.bin: 1
vx vy
Sum 0 1
Count 2 2
---------------------------------------------------------------------
x.bin: 2
y.bin: 1
vx vy
Sum -1 -2
Count 2 2
---------------------------------------------------------------------
x.bin: 0
y.bin: 2
vx vy
Sum 1 0
Count 1 1
---------------------------------------------------------------------
x.bin: 1
y.bin: 2
NULL
---------------------------------------------------------------------
x.bin: 2
y.bin: 2
vx vy
Sum 2 1
Count 4 4
答案 2 :(得分:1)
这是data.table
版本:
library(data.table)
dt.data<-as.data.table(df.data) # Convert to data.table
dt.data[,c("x.bin","y.bin"):=list(floor(x/3),floor(y/3))] # Add bin columns
setkey(dt.data,x.bin,y.bin)
dt.bin<-CJ(x=0:3, y=0:3) # Cross join to create bin combinations
dt.data.2<-dt.data[dt.bin,list(vx=sum(vx),vy=sum(vy),count=.N)] # Join the bins and data; sum vx/vy and count matching rows
dt.data.2[is.na(vx),vx:=0L] # Replace NA with 0
dt.data.2[is.na(vy),vy:=0L] # Replace NA with 0
dt.data.2[order(y.bin,x.bin)] # Display the final data.table output
## x.bin y.bin vx vy count
## 1: 0 0 0 0 0
## 2: 1 0 0 0 0
## 3: 2 0 1 1 1
## 4: 3 0 0 0 0
## 5: 0 1 0 0 0
## 6: 1 1 0 -2 3
## 7: 2 1 0 0 0
## 8: 3 1 0 0 0
## 9: 0 2 0 0 1
## 10: 1 2 0 0 0
## 11: 2 2 0 2 3
## 12: 3 2 -1 1 1
## 13: 0 3 0 0 0
## 14: 1 3 0 0 0
## 15: 2 3 0 0 0
## 16: 3 3 1 -1 1