我有一个数据集df
,需要在其中找到X
和Y
变量之间的相关性。我还需要查看与不同位置的相关性。我的问题是我有很多位置(例如95),当我用ggplot()
绘制代码时,输出很拥挤,而且不太清晰。是否可以增加ggplot()
的布局,以便我可以向下滚动以查看图。由于样本太多,因此我无法在此处分享。这是我尝试的方式:
dput(df)
structure(list(X = c(60, 72, 86, 82, 20, 43, 5, 9, 54, 53, 65,
73, 56, 80, 38, 14, 83, 85, 6, 61, 93, 39, 31, 86, 28, 26, 45,
17, 34, 95, 59, 81, 73, 20, 75, 77, 75, 32, 20, 67, 44, 56, 76,
76, 15, 93, 55, 77, 93, 90, 22, 53, 23, 78, 96, 76, 20, 49, 7,
32, 96, 20, 29, 25, 9, 59, 57, 15, 31, 27, 12, 14, 35, 71, 40,
68, 37, 54, 25, 70, 59, 58, 5, 49, 82, 14, 16, 9, 98, 86, 30,
38, 16, 64, 77, 27, 16, 41, 32, 57, 40, 15, 63, 36, 95, 85, 62,
54, 67, 32, 7, 98, 15, 29, 66, 36, 26, 41, 100, 5, 18, 63, 100,
51, 28, 6, 12, 98, 13, 67, 91, 55, 10, 30, 32, 23, 28, 41, 79,
80, 22, 63, 23, 96, 21, 35, 38, 73, 84, 72, 17, 49, 40, 100,
18, 9, 79, 65, 35, 80, 8, 71, 85, 60, 56, 66, 99, 73, 74, 100,
22, 39, 78, 7, 97, 69, 68, 88, 11, 69, 43, 95, 65, 68, 78, 44,
74, 73), Y = c(78, 61, 66, 70, 53, 28, 76, 86, 82, 19, 98, 12,
31, 86, 59, 91, 93, 64, 78, 12, 41, 99, 66, 64, 11, 57, 13, 35,
43, 67, 6, 72, 97, 31, 61, 48, 79, 69, 40, 44, 65, 46, 39, 75,
27, 6, 74, 46, 71, 42, 28, 30, 83, 82, 16, 34, 49, 15, 67, 66,
30, 26, 55, 12, 19, 47, 90, 50, 27, 56, 40, 92, 39, 63, 15, 95,
72, 58, 12, 15, 27, 44, 16, 9, 25, 19, 21, 43, 59, 48, 7, 12,
60, 65, 86, 53, 48, 73, 53, 70, 38, 43, 35, 18, 30, 62, 94, 61,
49, 5, 79, 33, 73, 91, 19, 81, 24, 93, 95, 73, 70, 92, 45, 33,
24, 20, 96, 11, 72, 51, 87, 19, 68, 47, 7, 98, 73, 27, 51, 10,
46, 27, 95, 38, 97, 27, 13, 28, 56, 61, 58, 26, 11, 69, 5, 84,
7, 11, 19, 55, 94, 25, 29, 32, 83, 78, 45, 29, 80, 30, 68, 91,
63, 16, 14, 75, 63, 17, 83, 7, 47, 71, 78, 92, 87, 14, 7, 21),
Location = c("L1", "L2", "L3", "L4", "L5", "L6", "L7", "L8",
"L9", "L10", "L11", "L12", "L13", "L14", "L15", "L16", "L17",
"L18", "L19", "L20", "L21", "L22", "L23", "L24", "L25", "L26",
"L27", "L28", "L29", "L30", "L31", "L32", "L33", "L34", "L35",
"L36", "L37", "L38", "L39", "L40", "L41", "L42", "L43", "L44",
"L45", "L46", "L47", "L48", "L49", "L50", "L51", "L52", "L53",
"L54", "L55", "L56", "L57", "L58", "L59", "L60", "L61", "L62",
"L63", "L64", "L65", "L66", "L67", "L68", "L69", "L70", "L71",
"L72", "L73", "L74", "L75", "L76", "L77", "L78", "L79", "L80",
"L81", "L82", "L83", "L84", "L85", "L86", "L87", "L88", "L89",
"L90", "L91", "L92", "L93", "L94", "L1", "L2", "L3", "L4",
"L5", "L6", "L7", "L8", "L9", "L10", "L11", "L12", "L13",
"L14", "L15", "L16", "L17", "L18", "L19", "L20", "L21", "L22",
"L23", "L24", "L25", "L26", "L27", "L28", "L29", "L30", "L31",
"L32", "L33", "L34", "L35", "L36", "L37", "L38", "L39", "L40",
"L41", "L42", "L43", "L44", "L45", "L46", "L47", "L48", "L49",
"L50", "L51", "L52", "L53", "L54", "L55", "L56", "L57", "L58",
"L59", "L60", "L61", "L62", "L63", "L64", "L65", "L66", "L67",
"L68", "L69", "L70", "L71", "L72", "L73", "L74", "L75", "L76",
"L77", "L78", "L79", "L80", "L81", "L82", "L83", "L84", "L85",
"L86", "L87", "L88", "L89", "L90", "L91", "L92", "L93", "L94"
)), row.names = c(NA, -188L), class = "data.frame")
ggplot(df, aes(x=X, y=Y, fill=Location)) +
geom_point() +
facet_wrap("Location",scales = "free_y") +
theme(legend.position = "none")
结果非常拥挤,只能在一个屏幕上显示所有95个位置。
答案 0 :(得分:1)
如果您需要的是可视化每个位置的x和y之间的相关性,我建议您做一个绘图来显示所有这些信息。使用发现here的强大功能,您可以计算每个组的x和y之间的相关性,并将结果存储在简单的数据框中。然后,您可以按位置绘制相关性。此处生成的该图按最小到最大顺序对相关性进行排序。
#Makes a dummy data set with 95 locations, 30 repeated observations/location
df <- data.frame(location = rep(1:95, 30), x = rnorm(2850) , y = rnorm(2850))
require(plyr)
#Function that calculates correlation and returns value in data frame
func <- function(df)
{
return(data.frame(COR = cor(df$x, df$y)))
}
#Run function on your dataset
df_cor <- ddply(df, .(location), func)
require(ggplot2)
require(tidyverse)
#Plot results
df_cor %>%
ggplot(aes(x = reorder(location, COR), y = COR)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90)) +
xlab("Location")
答案 1 :(得分:0)
根据您的问题并不能完全清楚您想要什么,但是如果我理解正确,您会遇到dpi问题。如果导出地块并将其放大,您将获得更多所需的东西。
示例:
library(tidyverse)
my_x <- rnorm(130)
my_y <- (my_x * 2) + 5
my_locations <- rep(letters, 5)
my_data <- tibble(my_x, my_y, my_locations)
ggplot(my_data, aes(x=my_x, y=my_y, fill=my_locations)) +
geom_point() +
facet_wrap(facets="my_locations",scales = "free_y") +
theme(legend.position = "none")
答案 2 :(得分:0)
由于您使用的是facet_wrap
,因此我认为您不一定需要对每个方面进行完美的并排(上下/上下)比较。当然,这将是好,但是如果有94个这样的方面(并且屏幕房地产有限),我建议您可以将其分解为多个图。
subgroups <- split(unique(df$Location), (seq_along(unique(df$Location))-1) %/% 16)
str(subgroups)
# List of 6
# $ 0: chr [1:16] "L1" "L2" "L3" "L4" ...
# $ 1: chr [1:16] "L17" "L18" "L19" "L20" ...
# $ 2: chr [1:16] "L33" "L34" "L35" "L36" ...
# $ 3: chr [1:16] "L49" "L50" "L51" "L52" ...
# $ 4: chr [1:16] "L65" "L66" "L67" "L68" ...
# $ 5: chr [1:14] "L81" "L82" "L83" "L84" ...
for (subgrp in subgroups) {
gg <- ggplot(df[ df$Location %in% subgrp, ], aes(x=X, y=Y, fill=Location)) +
geom_point() +
facet_wrap("Location",scales = "free_y") +
theme(legend.position = "none")
ggsave(sprintf("subplot_%s-%s.png", subgrp[1], tail(subgrp, 1)), gg)
}
从左边(全部)到右边(一次16 Location
秒):
(您将有六个这样的情节)。每个绘图中要包括的组数完全由您决定... 16是一个简单的数字,给出了大部分平衡的子组。
或者...您可以使用width=
和height=
来增加PNG的大小:
gg <- ggplot(df, ...) + ... # full image, no grouping
ggsave(sprintf("subplot_%s-%s.png", subgrp[1], tail(subgrp,1)), gg,
width=16, height=16)