我的数据如下:
> dput(head(updata[,c(1,4,5,6)],10))
structure(list(vehicle = c(2L, 5L, 8L, 9L, 10L, 12L, 13L, 14L,
18L, 20L), class = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L),
velocity = c(42.48, 39.81, 40.63, 47.11, 41.2, 38.92, 35.38,
38.62, 38.94, 43.24), lane = c(2L, 4L, 4L, 3L, 1L, 1L, 2L,
5L, 5L, 3L)), .Names = c("vehicle", "class", "velocity",
"lane"), row.names = c(32L, 707L, 1392L, 1772L, 2125L, 2501L,
2896L, 3262L, 3726L, 3941L), class = "data.frame")
我想通过'class'和'lane'找到'velocity'的累积分布。所以我使用了以下2个函数和ddply
:
speedu <- function(x){
hi <- hist(x)
speedmph=round(hi$breaks*0.68,1)}
cumdistu <- function(x){
hi <- hist(x)
prob=c(0, round(cumsum(hi$counts)/sum(hi$counts),digits=2))}
spdistu <- ddply(updata, c('lane', 'class'), summarise,speed=speedu(velocity),cprob=cumdist(velocity))
在这里,我实际上并不需要2个功能。只有一个函数可以找到速度中断和累积概率,但问题是function
不给出2个输出,只给出输出中的最后一个变量。我在函数中尝试list(speedmph, prob)
,但输出包含一行中的所有速度。使用上面的配置,我可以得到以下输出,但想知道是否有一种简单的方法来实现相同的目的:
> dput(spdistu)
structure(list(lane = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), class = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
speed = c(6.8, 13.6, 20.4, 27.2, 34, 40.8, 0, 3.4, 6.8, 10.2,
13.6, 17, 20.4, 23.8, 27.2, 30.6, 34, 37.4, 27.2, 34, 13.6,
20.4, 27.2, 34, 40.8, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8,
27.2, 30.6, 34, 37.4, 20.4, 21.8, 23.1, 24.5, 6.8, 10.2,
13.6, 17, 20.4, 23.8, 27.2, 30.6, 34, 17.7, 19, 20.4, 21.8,
23.1, 24.5, 25.8, 27.2, 28.6, 23.8, 24.5, 25.2, 25.8, 26.5,
27.2, 0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2, 30.6,
34, 37.4, 13.6, 17, 20.4, 23.8, 27.2, 30.6, 23.1, 24.5, 25.8,
27.2, 28.6, 29.9, 0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8,
27.2, 30.6, 34, 37.4, 40.8, 13.6, 17, 20.4, 23.8, 27.2, 30.6
), cprob = c(0, 0.14, 0.29, 0.71, 0.86, 1, 0, 0.01, 0.04,
0.08, 0.14, 0.22, 0.32, 0.5, 0.74, 0.95, 0.99, 1, 0, 1, 0,
0.17, 0.67, 0.83, 1, 0, 0, 0.03, 0.07, 0.16, 0.3, 0.51, 0.8,
0.99, 1, 1, 0, 0.5, 0.5, 1, 0, 0.03, 0.05, 0.11, 0.21, 0.49,
0.85, 0.99, 1, 0, 0.07, 0.07, 0.27, 0.47, 0.67, 0.73, 0.93,
1, 0, 0.25, 0.25, 0.25, 0.75, 1, 0, 0, 0.01, 0.01, 0.06,
0.1, 0.17, 0.4, 0.75, 0.96, 1, 1, 0, 0.09, 0.09, 0.52, 0.91,
1, 0, 0.29, 0.43, 0.57, 0.57, 1, 0, 0, 0.01, 0.01, 0.02,
0.04, 0.06, 0.26, 0.66, 0.95, 1, 1, 1, 0, 0.07, 0.21, 0.43,
0.93, 1)), .Names = c("lane", "class", "speed", "cprob"), row.names = c(NA,
-107L), class = "data.frame")
答案 0 :(得分:0)
你在寻找像这样的东西
f <- function(x) {
hi <- hist(x)
speedmph=round(hi$breaks*0.68,1)
prob=c(0, round(cumsum(hi$counts)/sum(hi$counts),digits=2))
cbind(speedmph, prob)
}
spdistu <- ddply(updata, c('lane', 'class'), summarise, speed=f(velocity))