Question

我有一个数据集df，需要在其中找到X和Y变量之间的相关性。我还需要查看与不同位置的相关性。我的问题是我有很多位置（例如95），当我用ggplot()绘制代码时，输出很拥挤，而且不太清晰。是否可以增加ggplot()的布局，以便我可以向下滚动以查看图。由于样本太多，因此我无法在此处分享。这是我尝试的方式：

        dput(df)
structure(list(X = c(60, 72, 86, 82, 20, 43, 5, 9, 54, 53, 65, 
73, 56, 80, 38, 14, 83, 85, 6, 61, 93, 39, 31, 86, 28, 26, 45, 
17, 34, 95, 59, 81, 73, 20, 75, 77, 75, 32, 20, 67, 44, 56, 76, 
76, 15, 93, 55, 77, 93, 90, 22, 53, 23, 78, 96, 76, 20, 49, 7, 
32, 96, 20, 29, 25, 9, 59, 57, 15, 31, 27, 12, 14, 35, 71, 40, 
68, 37, 54, 25, 70, 59, 58, 5, 49, 82, 14, 16, 9, 98, 86, 30, 
38, 16, 64, 77, 27, 16, 41, 32, 57, 40, 15, 63, 36, 95, 85, 62, 
54, 67, 32, 7, 98, 15, 29, 66, 36, 26, 41, 100, 5, 18, 63, 100, 
51, 28, 6, 12, 98, 13, 67, 91, 55, 10, 30, 32, 23, 28, 41, 79, 
80, 22, 63, 23, 96, 21, 35, 38, 73, 84, 72, 17, 49, 40, 100, 
18, 9, 79, 65, 35, 80, 8, 71, 85, 60, 56, 66, 99, 73, 74, 100, 
22, 39, 78, 7, 97, 69, 68, 88, 11, 69, 43, 95, 65, 68, 78, 44, 
74, 73), Y = c(78, 61, 66, 70, 53, 28, 76, 86, 82, 19, 98, 12, 
31, 86, 59, 91, 93, 64, 78, 12, 41, 99, 66, 64, 11, 57, 13, 35, 
43, 67, 6, 72, 97, 31, 61, 48, 79, 69, 40, 44, 65, 46, 39, 75, 
27, 6, 74, 46, 71, 42, 28, 30, 83, 82, 16, 34, 49, 15, 67, 66, 
30, 26, 55, 12, 19, 47, 90, 50, 27, 56, 40, 92, 39, 63, 15, 95, 
72, 58, 12, 15, 27, 44, 16, 9, 25, 19, 21, 43, 59, 48, 7, 12, 
60, 65, 86, 53, 48, 73, 53, 70, 38, 43, 35, 18, 30, 62, 94, 61, 
49, 5, 79, 33, 73, 91, 19, 81, 24, 93, 95, 73, 70, 92, 45, 33, 
24, 20, 96, 11, 72, 51, 87, 19, 68, 47, 7, 98, 73, 27, 51, 10, 
46, 27, 95, 38, 97, 27, 13, 28, 56, 61, 58, 26, 11, 69, 5, 84, 
7, 11, 19, 55, 94, 25, 29, 32, 83, 78, 45, 29, 80, 30, 68, 91, 
63, 16, 14, 75, 63, 17, 83, 7, 47, 71, 78, 92, 87, 14, 7, 21), 
    Location = c("L1", "L2", "L3", "L4", "L5", "L6", "L7", "L8", 
    "L9", "L10", "L11", "L12", "L13", "L14", "L15", "L16", "L17", 
    "L18", "L19", "L20", "L21", "L22", "L23", "L24", "L25", "L26", 
    "L27", "L28", "L29", "L30", "L31", "L32", "L33", "L34", "L35", 
    "L36", "L37", "L38", "L39", "L40", "L41", "L42", "L43", "L44", 
    "L45", "L46", "L47", "L48", "L49", "L50", "L51", "L52", "L53", 
    "L54", "L55", "L56", "L57", "L58", "L59", "L60", "L61", "L62", 
    "L63", "L64", "L65", "L66", "L67", "L68", "L69", "L70", "L71", 
    "L72", "L73", "L74", "L75", "L76", "L77", "L78", "L79", "L80", 
    "L81", "L82", "L83", "L84", "L85", "L86", "L87", "L88", "L89", 
    "L90", "L91", "L92", "L93", "L94", "L1", "L2", "L3", "L4", 
    "L5", "L6", "L7", "L8", "L9", "L10", "L11", "L12", "L13", 
    "L14", "L15", "L16", "L17", "L18", "L19", "L20", "L21", "L22", 
    "L23", "L24", "L25", "L26", "L27", "L28", "L29", "L30", "L31", 
    "L32", "L33", "L34", "L35", "L36", "L37", "L38", "L39", "L40", 
    "L41", "L42", "L43", "L44", "L45", "L46", "L47", "L48", "L49", 
    "L50", "L51", "L52", "L53", "L54", "L55", "L56", "L57", "L58", 
    "L59", "L60", "L61", "L62", "L63", "L64", "L65", "L66", "L67", 
    "L68", "L69", "L70", "L71", "L72", "L73", "L74", "L75", "L76", 
    "L77", "L78", "L79", "L80", "L81", "L82", "L83", "L84", "L85", 
    "L86", "L87", "L88", "L89", "L90", "L91", "L92", "L93", "L94"
    )), row.names = c(NA, -188L), class = "data.frame")



ggplot(df, aes(x=X, y=Y, fill=Location)) +
    geom_point() + 
    facet_wrap("Location",scales = "free_y") + 
    theme(legend.position = "none")

结果非常拥挤，只能在一个屏幕上显示所有95个位置。

Answer 1

如果您需要的是可视化每个位置的x和y之间的相关性，我建议您做一个绘图来显示所有这些信息。使用发现here的强大功能，您可以计算每个组的x和y之间的相关性，并将结果存储在简单的数据框中。然后，您可以按位置绘制相关性。此处生成的该图按最小到最大顺序对相关性进行排序。

#Makes a dummy data set with 95 locations, 30 repeated observations/location
df <- data.frame(location = rep(1:95, 30), x = rnorm(2850) , y = rnorm(2850))

require(plyr)

#Function that calculates correlation and returns value in data frame
func <- function(df)
{
return(data.frame(COR = cor(df$x, df$y)))
}

#Run function on your dataset
df_cor <- ddply(df, .(location), func)

require(ggplot2)
require(tidyverse)

#Plot results
df_cor %>% 
 ggplot(aes(x = reorder(location, COR), y = COR)) + 
 geom_point() +
 theme(axis.text.x = element_text(angle = 90)) +
 xlab("Location")

Answer 2

根据您的问题并不能完全清楚您想要什么，但是如果我理解正确，您会遇到dpi问题。如果导出地块并将其放大，您将获得更多所需的东西。

示例：

library(tidyverse)

my_x <- rnorm(130)
my_y <- (my_x * 2) + 5
my_locations <- rep(letters, 5)
my_data <- tibble(my_x, my_y, my_locations)

ggplot(my_data, aes(x=my_x, y=my_y, fill=my_locations)) +
  geom_point() + 
  facet_wrap(facets="my_locations",scales = "free_y") + 
  theme(legend.position = "none")

在绘图窗口中，输出看起来非常拥挤：

但是，如果将其另存为更大的图像，则看起来很棒：

Answer 3

由于您使用的是facet_wrap，因此我认为您不一定需要对每个方面进行完美的并排（上下/上下）比较。当然，这将是好，但是如果有94个这样的方面（并且屏幕房地产有限），我建议您可以将其分解为多个图。

subgroups <- split(unique(df$Location), (seq_along(unique(df$Location))-1) %/% 16)
str(subgroups)
# List of 6
#  $ 0: chr [1:16] "L1" "L2" "L3" "L4" ...
#  $ 1: chr [1:16] "L17" "L18" "L19" "L20" ...
#  $ 2: chr [1:16] "L33" "L34" "L35" "L36" ...
#  $ 3: chr [1:16] "L49" "L50" "L51" "L52" ...
#  $ 4: chr [1:16] "L65" "L66" "L67" "L68" ...
#  $ 5: chr [1:14] "L81" "L82" "L83" "L84" ...

for (subgrp in subgroups) {
  gg <- ggplot(df[ df$Location %in% subgrp, ], aes(x=X, y=Y, fill=Location)) +
      geom_point() + 
      facet_wrap("Location",scales = "free_y") + 
      theme(legend.position = "none") 
  ggsave(sprintf("subplot_%s-%s.png", subgrp[1], tail(subgrp, 1)), gg)
}

从左边（全部）到右边（一次16 Location秒）：

（您将有六个这样的情节）。每个绘图中要包括的组数完全由您决定... 16是一个简单的数字，给出了大部分平衡的子组。

或者...您可以使用width=和height=来增加PNG的大小：

gg <- ggplot(df, ...) + ... # full image, no grouping
ggsave(sprintf("subplot_%s-%s.png", subgrp[1], tail(subgrp,1)), gg,
  width=16, height=16)

Facet_Wrap在我的数据集中出现了更多级别的因子拥塞

3 个答案: