绘制具有重复点的数据

时间:2017-10-20 08:40:22

标签: r plot ggplot2

请告诉我,还有其他方法可以用比这更好的方式绘制重复数据吗?复制在此图中不明确。

enter image description here

 library(ggplot2)
 p <- ggplot(output, aes(output$Longitudes, output$Latitudes))
 p + geom_text(aes(x = jitter(output$Longitudes), y = 
 jitter(output$Latitudes)),check_overlap = FALSE, size =5)
 p + geom_point(position =  "jitter")

在特定点上显示重复的目的是显示事件。

2 个答案:

答案 0 :(得分:1)

要可视化重复点,您可以:

  • 添加抖动(例如,使用geom_jitter
  • 降低alpha(例如,alpha = 0.1
  • 减少点数(例如,size = 1
  • 更改点的形状(例如,shape = 21

代码:

# Generate data
df <- reshape2::melt(data.frame(A = rep(0, 1e3), B = rep(1, 1e3)))
# Plot data
library(ggplot2)
ggplot(df, aes(variable, value)) +
    geom_jitter(alpha = 0.5, size = 2, shape = 21) +
    theme_classic()

简介:

enter image description here

答案 1 :(得分:0)

这实际上是我对 ggplot 的最大抱怨之一。

如此之多,以至于我写了自己的解决方案(抖动/ alpha除外)。

解决方案

从本质上讲,这是一个名为position_bunch的新“ position”,它根据模式在每个唯一(X,Y)处分配点。可以像这样使用:

g = ggplot(...) +
  geom_point(
    position = position_bunch(
      shape = 'hex',
      width = .7,
      sort  = TRUE,
    )
  )

产生类似的东西

example output

实施

 position_bunch = function(shape='hex',width=0.5,sort=1) {
   if (shape == 'hex') {
     n.layer.fun = n.layer.hex
     delta.fun   = delta.hex
   }
   if (shape == 'square') {
     n.layer.fun = n.layer.square
     delta.fun   = delta.square
   }
   if (shape == 'spiral') {
     n.layer.fun = n.layer.spiral
     delta.fun   = delta.spiral
   }
   if (sort) {
     sort.fun = sorting.fun
   } else {
     sort.fun = identity
   }
   cols = c('x','y')
   return(ggproto('PositionBunch',Position,
   required_aes = cols,
    compute_layer = function(self,data,params,layout) {
      select = function(x,y) {
        return((data$x==x) & (data$y==y))
      }
      u = unique(data[,cols])
      n = mapply(function(x,y) {sum(select(x,y))},u$x,u$y)
      l.max = n.layer.fun(max(n))
      delta = sort.fun(delta.fun(l.max),1)
      for (i in 1:nrow(u)) {
        rows = select(u$x[i],u$y[i])
        delta.i = sort.fun(delta[1:sum(rows),cols],sort) * (width/2/l.max)
        data[rows,cols] = data[rows,cols] + delta.i
      }
      return(data)
    })
  )
}
sorting.fun = function(delta,dir) {
  distance = apply(delta,1,function(d){sum(d^2)})
  return(delta[order(distance,decreasing=(dir==-1)),])
}
# -----------------------------------------------------------------------------
# hex
n.layer.hex = function(n) {
  return(floor(1+(-3+sqrt(9+12*(n-1)))/6))
}
delta.hex = function(layers) {
  yv = sqrt(3)/2; yh = 0; xv = 0.5; xh = 1;
  rep.steps = function(steps,layer,dim) {
    steps = rep(steps,each=layer)
    steps[1] = steps[1] + xv*(dim=='x') - yv*(dim=='y')
    return(steps)
  }
  dx = 0; dy = 0;
  for (layer in 1:layers) {
    dx = c(dx,rep.steps(c(+xv,-xv,-xh,-xv,+xv,+xh),layer,dim='x'))
    dy = c(dy,rep.steps(c(+yv,+yv, yh,-yv,-yv, yh),layer,dim='y'))
  }
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# square
n.layer.square = function(n) {
  return(floor(1+(-2+sqrt(4+8*(n-1)))/4))
}
delta.square = function(layers) {
  yv = 1; yh = 0; xv = 0; xh = 1;
  rep.steps = function(steps,layer,dim) {
    steps = rep(steps,each=2*layer)
    steps[1] = steps[1] + xh*(dim=='x') - yv*(dim=='y')
    return(steps)
  }
  dx = 0; dy = 0;
  for (layer in 1:layers) {
    dx = c(dx,rep.steps(c( xv,-xh, xv,+xh),layer,dim='x'))
    dy = c(dy,rep.steps(c(+yv, yh,-yv, yh),layer,dim='y'))
  }
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}
# -----------------------------------------------------------------------------
# spiral
f.spiral = pi*(1+sqrt(5))
n.layer.spiral = function(n) {
  return(ceiling(n/f.spiral))
}
delta.spiral = function(layers){
  i  = 0:ceiling(layers*f.spiral)
  r  = layers/2*sqrt(i/layers)
  t  = pi*(1+sqrt(5))*i
  dx = r * cos(t)
  dy = r * sin(t)
  return(data.frame(x=cumsum(dx),y=cumsum(dy)))
}

测试代码

library('ggplot2')
library('gridExtra')
library('viridis')
source('ggpositions.r')
set.seed(1234)

g.list = list()
for (N in c(10,100,500)){
  data = data.frame(
    x = factor(floor(runif(N,1,3+1)),labels=c('A','B','C')),
    y = factor(floor(runif(N,1,3+1))),
    z = rev(sort(runif(N,1,N)))
  )
  for (shape in c('hex','square','spiral')){
    g = ggplot(data,aes(x=x,y=y,color=z)) +
      geom_point(position=position_bunch(
        shape = shape,
        width = .7,
      ),size=sqrt(2)/log10(N)) +
      scale_color_viridis() +
      xlab(NULL) + ylab(NULL) +
      theme(legend.position='none')
    g.list[[length(g.list)+1]] = g
  }
}
G = do.call(arrangeGrob,g.list)
ggsave('test.png',G)

注释

  • 这是一项正在进行的工作-欢迎反馈!
  • 到目前为止,我仅使用geom_pointaes(x= ,y= )进行了测试
  • 点数很难可靠地缩放,因此您可能必须手动修改
  • 清洁和测试后,我打算上传到ggplot2 extensions library