我想在R中创建beeswarm plot,但点数大小将与第三个变量相关。事情like this
但很简单,在R风格中,没有描述等。我已经查看了replace
包,但似乎不是那个选项。
这就是我尝试过的,但圈子被卡住了
beeswarm
使用library(beeswarm)
myData <- list(x = rnorm(n = 10, sd = 1, mean = 10),
y = rnorm(n = 10, sd = 3, mean = 7))
df <- as.data.frame(myData)
df$size <- 1:10
bee_info <- beeswarm(myData, col = c("red", "blue"))
plot(bee_info$x, bee_info$y, type = "n", axes = FALSE, xlab = "", ylab = "")
symbols(x = bee_info$x, y = bee_info$y,
circles = rep(df$size, 2),
bg = adjustcolor(bee_info$col, 0.5),
xpd = TRUE, add = TRUE,
inches = 0.5)
axis(1, at = seq_along(myData), labels = names(myData), lwd = 0)
axis(2, las = 2)
中的spacing
和beeswarm
中的inches
,我可以让情节更接近我想要的。
symbol
但我希望圈子更接近并自动化,因为我需要生成数百个这样的图形。
使用我在下面创建的功能,我得到了我想要的东西。但它使用蛮力来估计点的位置,这是一种非常低效的解决方案。
library(beeswarm)
myData <- list(x = rnorm(n = 10, sd = 1, mean = 10),
y = rnorm(n = 10, sd = 3, mean = 7))
df <- as.data.frame(myData)
df$size <- 1:10
bee_info <- beeswarm(myData, col = c("red", "blue"), spacing = 5)
plot(bee_info$x, bee_info$y, type = "n", axes = FALSE, xlab = "", ylab = "")
symbols(x = bee_info$x, y = bee_info$y,
circles = rep(df$size, 2),
bg = adjustcolor(bee_info$col, 0.5),
xpd = TRUE, add = TRUE,
inches = 0.2)
axis(1, at = seq_along(myData), labels = names(myData), lwd = 0)
axis(2, las = 2)
以下是如何使用该功能的示例:
library(dplyr)
library(magrittr)
library(purrr)
library(sp)
library(rgeos)
library(raster)
library(truncnorm)
library(scales)
library(RColorBrewer)
library(magick)
# from https://stat.ethz.ch/pipermail/r-sig-geo/2012-November/016632.html
get_circle_coord <- function(x, y, r = 1, lo = 100){
pts <- seq(0, 2 * pi, length.out = lo)
coords <- cbind(x + r * sin(pts), y + r * cos(pts))
return(coords)
}
make_poly_circles <- function(x, y, r = 1, lo = 100, id = "x"){
circle <- get_circle_coord(x, y, r, lo) %>%
as.data.frame %>%
magrittr::set_colnames(c("x", "y")) %>%
as.matrix %>%
Polygon %>%
list(.) %>%
Polygons(ID = id) %>%
list(.) %>%
SpatialPolygons
return(circle)
}
make_poly_rect <- function(xMin, xMAx, yMin, yMax, id = "x"){
rect <- data.frame(x = c(xMAx, xMin, xMin, xMAx, xMAx),
y = c(yMin, yMin, yMax, yMax, yMin)) %>%
as.matrix %>%
Polygon %>%
list(.) %>%
Polygons(ID = id) %>%
list(.) %>%
SpatialPolygons
return(rect)
}
get_area_in_intersection <- function(shp1, shp2){
int <- rgeos::gIntersection(shp1, shp2)
int_area <- ifelse(is.null(int), 0, rgeos::gArea(int))
return(int_area)
}
get_side_move <- function(circle, other_circles, x){
y_circle <- sp::bbox(circle)["y",]
bb_others <- sp::bbox(other_circles)
maxX <- bb_others["x", "max"]
minX <- bb_others["x", "min"]
maxY <- y_circle["min"]
minY <- y_circle["max"]
square_left <- make_poly_rect(xMin = minX, xMAx = x,
yMin = minY, yMax = maxY)
square_right <- make_poly_rect(xMin = x, xMAx = maxX,
yMin = minY, yMax = maxY)
left_area <- get_area_in_intersection(other_circles, square_left)
right_area <- get_area_in_intersection(other_circles, square_right)
areas <- c("left" = left_area, "right" = right_area)
same_area <- all.equal(areas[[1]], areas[[2]]) == "TRUE"
move_side <- ifelse(same_area,
yes = sample(names(areas), 1),
no = names(which.min(areas)))
return(move_side)
}
# y: y position of the circles
# x: x inicial position of the circles
# size: size of the circles
# col: collor of the circles (one character or a vector of the size of y)
# rescFactor: factor to rescale size.
# lo: who many points will the circles coords will have
# step_x: number by with the the circle will move in the x axis each loop
# progress_bar: if show a progress bar
make_linear_circle_packing <- function(y,
x,
size,
range_y = range(y),
col = "white",
rescFactor = 0.025,
lo = 100,
step_x = quantile(y * rescFactor,
0.1),
progress_bar = TRUE){
# for the loop
n <- length(y)
indexes <- seq_len(n)
# Will store the shape files of the circles can't use `vector`
# function because `do.call(rbind, circle_packing) will return error
# with NULL values
circle_packing <- list()
# mix the order, so we have a different plot each time
s <- sample(indexes)
y <- y[s]
size <- size[s]
size <- scales::rescale(x = size,
to = c(0, diff(range_y) * rescFactor))
if(progress_bar){
pb <- txtProgressBar(min = 1, max = n, style = 3)
}
for(i in indexes){
# Make the circle
circle <- make_poly_circles(x = x, y = y[i],
r = size[i],
lo = lo, id = i)
if(progress_bar){
setTxtProgressBar(pb, i)
}
# If is the first circle, as will
# not have anything to overlap with
# we just store it
if(i == 1) {
circle_packing[[i]] <- circle
next
}
# circles that have already been processed
other_circles <- do.call(rbind, circle_packing)
# This is te new x position of the circle
# (it can decrease or increase in the loop depenging
# of the density in each area)
new_x <- x
first_repetition <- TRUE
repeat{
check_intersection <- rgeos::gIntersects(other_circles, circle)
if(!check_intersection) break
if(first_repetition){
move_side <- get_side_move(circle = circle,
other_circles = other_circles,
x = x)
first_repetition <- FALSE
}
move <- ifelse(test = move_side == "left", yes = -step_x , no = step_x)
new_x <- new_x + move
circle <- make_poly_circles(new_x, y[i], size[i], lo = lo, id = i)
}
circle_packing[[i]] <- circle
}
# Make Spatial Polygon Data Frame in the order the user supply
circle_packing <- do.call(rbind, circle_packing)
df <- data.frame(id = s, col = col[s],size = size,
y = y, x = x,
stringsAsFactors = FALSE)
circle_packing <- sp::SpatialPolygonsDataFrame(circle_packing, df)
return(circle_packing)
}
multiples_linear_circle_packing <- function(df,
space_btw = NULL,
step_x = quantile(df$y, 0.001)){
space_btw = NULL
per_cl <- split(df, df$class)
classes <- names(per_cl)
nClass <- length(classes)
if(is.null(space_btw)){
space_btw <- nClass/2.5^nClass
}
circle_packings <- vector("list", length = nClass)
for(i in seq_len(nClass)){
class <- classes[[i]]
cat("Class", class, "|", i, "of", nClass, "\n")
circle_packings[[i]] <- make_linear_circle_packing(y = per_cl[[i]]$y,
x = 0,
size = per_cl[[i]]$size,
col = per_cl[[i]]$col,
range_y = range(df$y))
circle_packings[[i]]$class <- class
cat("\n")
}
bbs <- lapply(circle_packings, bbox)
x_sizes <- sapply(bbs, function(x) diff(x["x",]))
x_total <- sum(x_sizes)
dist_btw_class <- x_total * space_btw
circle_packings[[1]]$x <- 1
x <- x_sizes[1] + dist_btw_class
for(i in seq_len(nClass)[-1]){
circle_packings[[i]]$x <- x
circle_packings[[i]] <- raster::shift(circle_packings[[i]], x = x)
x <- x + x_sizes[i] + dist_btw_class
}
circle_packings <- do.call(rbind, circle_packings)
return(circle_packings)
}
plot_circle_packings <- function(df,
space_btw = NULL,
step_x = quantile(df$y, 0.001)){
circle_packings <- multiples_linear_circle_packing(df = df,
space_btw = space_btw,
step_x = space_btw)
plot(circle_packings, col = circle_packings$col)
# to calculate the axis and ticks position
bb <- bbox(circle_packings)
ylims <- bb["y",]
xlims <- bb["x",]
# position axis
x_size <- diff(xlims)
y_pos <- xlims[1] - x_size/50
y_size <- diff(ylims)
x_pos <- ylims[1] - y_size/5
# Tick marks position
y_at <- pretty(x = ylims[1]:ylims[2], n = 5)
axis(side = 2, at = y_at, las = 2)
axis(side = 1, at = unique(circle_packings$x),
labels = unique(circle_packings$class), pos = x_pos,
lwd = 0)
circle_packings
}
结果如下: