在传递给`geom_spoke`之前,在2D箱中汇总X,Y,theta数据

时间:2017-09-13 12:34:12

标签: r ggplot2 binning

我有包含x,y坐标和航向角的数据,我想将其划分为2D区域,以便计算每个区间的平均航向并使用ggplot的{​​{1}}进行绘图

这是我想要做的一个例子,手动创建了垃圾箱:

geom_spoke

Desired geom_spoke plot 我知道如何创建包含每个bin的计数数据的2D bin,例如:

# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100), y = runif(100,0,100), angle = runif(100, 0, 2*pi))

# manual binning
bins <- rbind(
  #bottom left
  dat %>%
    filter(x < 50 & y < 50) %>%
    summarise(x = 25, y = 25, angle = mean(angle), n = n()),
  #bottom right
  dat %>%
    filter(x > 50 & y < 50) %>%
    summarise(x = 75, y = 25, angle = mean(angle), n = n()),
  #top left
  dat %>%
    filter(x < 50 & y > 50) %>%
    summarise(x = 25, y = 75, angle = mean(angle), n = n()),
  #top right
  dat %>%
    filter(x > 50 & y > 50) %>%
    summarise(x = 75, y = 75, angle = mean(angle), n = n())
)

# plot
ggplot(bins, aes(x, y)) +
  geom_point() +
  coord_equal() +
  scale_x_continuous(limits = c(0,100)) +
  scale_y_continuous(limits = c(0,100)) +
  geom_spoke(aes(angle = angle, radius = n/2), arrow=arrow(length = unit(0.2,"cm")))

但我似乎无法找到一种方法来汇总其他变量,例如在传递给# heatmap of x,y counts p <- ggplot(dat, aes(x, y)) + geom_bin2d(binwidth = c(50, 50)) + coord_equal() #ggplot_build(p)$data[[1]] #access binned data 之前为每个bin进行标题。没有第一个binning,我的情节看起来像这样: Bad geom_spoke plot

2 个答案:

答案 0 :(得分:3)

这是一种方法。您需要确定每个维度(x&amp; y)中的箱数/范围一次,&amp;其他一切都应该由代码覆盖:

# adjust range & number of bins here
x.range <- pretty(dat$x, n = 3)
y.range <- pretty(dat$y, n = 3)

> x.range
[1]   0  50 100
> y.range
[1]   0  50 100

根据哪个x&amp; x自动将每行分配到bin。它的间隔是:

dat <- dat %>%
  rowwise() %>%
  mutate(x.bin = max(which(x > x.range)),
         y.bin = max(which(y > y.range)),
         bin = paste(x.bin, y.bin, sep = "_")) %>%
  ungroup()

> head(dat)
# A tibble: 6 x 6
         x        y    angle x.bin y.bin   bin
     <dbl>    <dbl>    <dbl> <int> <int> <chr>
1 26.55087 65.47239 1.680804     1     2   1_2
2 37.21239 35.31973 1.373789     1     1   1_1
3 57.28534 27.02601 3.247130     2     1   2_1
4 90.82078 99.26841 1.689866     2     2   2_2
5 20.16819 63.34933 1.138314     1     2   1_2
6 89.83897 21.32081 3.258310     2     1   2_1

计算每个箱子的平均值:

dat <- dat %>%
  group_by(bin) %>%
  mutate(x.mean = mean(x),
         y.mean = mean(y),
         angle.mean = mean(angle),
         n = n()) %>%
  ungroup()

> head(dat)
# A tibble: 6 x 10
         x        y    angle x.bin y.bin   bin   x.mean   y.mean angle.mean     n
     <dbl>    <dbl>    <dbl> <int> <int> <chr>    <dbl>    <dbl>      <dbl> <int>
1 26.55087 65.47239 1.680804     1     2   1_2 26.66662 68.56461   2.672454    29
2 37.21239 35.31973 1.373789     1     1   1_1 33.05887 28.86027   2.173177    23
3 57.28534 27.02601 3.247130     2     1   2_1 74.71214 24.99131   3.071629    23
4 90.82078 99.26841 1.689866     2     2   2_2 77.05622 77.91031   3.007859    25
5 20.16819 63.34933 1.138314     1     2   1_2 26.66662 68.56461   2.672454    29
6 89.83897 21.32081 3.258310     2     1   2_1 74.71214 24.99131   3.071629    23

没有对任何箱号/箱宽度进行硬编码的情节:

ggplot(dat,
       aes(x, y, fill = bin)) +
  geom_bin2d(binwidth = c(diff(x.range)[1], 
                          diff(y.range)[1])) +
  geom_point(aes(x = x.mean, y = y.mean)) +
  geom_spoke(aes(x = x.mean, y = y.mean, angle = angle.mean, radius = n/2),
             arrow=arrow(length = unit(0.2,"cm"))) +
  coord_equal()

ggplot

其他细节,例如填充调色板的选择,图例标签,情节标题等,可以随后进行调整。

答案 1 :(得分:0)

为了扩展@ Z.Lin的答案,这里有一个修改,它允许一个绘图指向每个bin的中心而不是平均x,y坐标。我很高兴听到有没有比使用left_join更有说服力的解决方案。

# data
set.seed(1)
dat <- data.frame(x = runif(100,0,100), 
                  y = runif(100,0,100), 
                  angle = runif(100, 0, 2*pi))

# set parameters
n <- 2 #n bins
x.max #maximum x value
y.max #maximum y value

x.range <- seq(0, x.max, length.out = n+1)
y.range <- seq(0, y.max, length.out = n+1)

# bin data
dat <- dat %>%
  rowwise() %>%
  mutate(x.bin = max(which(x > x.range)),
         y.bin = max(which(y > y.range)),
         bin = paste(x.bin, y.bin, sep = "_")) %>%
  ungroup()

# summarise values for each bin
dat <- dat %>%
  group_by(bin) %>%
  select(bin, x.bin, y.bin, x, y, angle) %>%
  mutate(angle.mean = mean(angle),
         n = n()) %>%
  ungroup()

# add x,y-coords for centre points of each bin
x.bin.coords <- data.frame(x.bin = 1:n, 
                           x.bin.coord = (x.range + (x.max / n / 2))[1:n])
y.bin.coords <- data.frame(y.bin = 1:n,
                           y.bin.coord = (y.range + (y.max / n / 2))[1:n])

dat <- left_join(dat, x.bin.coords, by = "x.bin")
dat <- left_join(dat, y.bin.coords, by = "y.bin")

# plot
ggplot(data = dat, aes(x, y)) +
  geom_bin2d(binwidth = c(diff(x.range)[1], diff(y.range)[1])) +
  geom_point(data = dat, aes(x = x.bin.coord, y = y.bin.coord)) +
  geom_spoke(data = dat, aes(x = x.bin.coord, y = y.bin.coord, angle = angle.mean, radius = n/2), arrow=arrow(length = unit(0.2,"cm"))) +
  coord_equal()

plot