我正在使用ggbiplot函数绘制两个主要成分的图。 https://github.com/vqv/ggbiplot/blob/master/R/ggbiplot.r#L171
我已经添加了一张实际图形的图片。如您所见,的变量名看起来很混乱。为了解决这个问题,我最初使用了ggrepel,它只能部分解决问题。 ggrepel在ggbiplot中巧妙地相互排斥。因此,现在变量名称清晰易读,但仍然很难阅读。
有什么方法可以使该图更具可读性?有什么方法可以在图的右侧将变量名称整齐地显示为图例?我是否只需要用简单的术语(例如“ a”,“ b”,“ c”等)来依赖原始变量,并在图例中用其实际名称进行解释?我该怎么做呢?还有其他更简单/优雅的解决方案吗?
以下是我正在使用的示例数据和代码。另外,示例数据和我的数据之间的区别是我的数据中的“组”是分类的。
set.seed(1)
dat <- data.frame(
Diet = sample(1:2),
Outcome1 = sample(1:10),
Outcome2 = sample(11:20),
Outcome3 = sample(21:30),
Response1 = sample(31:40),
Response2 = sample(41:50),
Response3 = sample(51:60)
)
ir.pca <- prcomp(dat[,3:5], center = TRUE, scale. = TRUE)
summary(ir.pca)
loadings <- ir.pca$rotation
scores <- ir.pca$x
correlations <- t(loadings)*ir.pca$sdev
dat2 <- as.data.frame(dat)
ggbiplot(ir.pca, choices=c(1,2), # creates a plot with ellipse
groups=dat2[,1],
obs.scale = 1,
var.scale = 1,
ellipse = TRUE)
编辑
原始数据集
structure(list(X = 1:10, subject = structure(c(1L, 2L, 3L, 4L,
5L, 11L, 12L, 14L, 18L, 19L), .Label = c("100", "102", "105",
"108", "109", "113", "114", "116", "117", "119", "121", "123",
"128", "131", "136", "138", "139", "141", "144", "146", "149",
"151", "152", "153", "154", "155", "157", "160", "170", "171",
"173", "174", "176", "178", "179", "180", "181", "183"), class = "factor"),
diet = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = c("Ancestral", "USDA"), class = "factor"), veg = c(1.702913333,
1.878265667, 3.240286333, 6.186628, 2.460914333, 2.129945333,
3.715283667, 2.502908333, 2.370914333, 3.978840667), fruit = c(3.303805667,
2.906236667, 0.774577333, 1.452929333, 1.749540333, 0.669345,
2.192563, 2.824062667, 1.423301333, 0.942424333), meat = c(5.788873333,
5.001508333, 6.159671, 6.437514333, 3.539373667, 4.876655333,
10.33338333, 7.553404, 6.431676667, 4.846782667), grains = c(0.490417667,
2.626051, 0, 1.990165, 0.933450667, 0.0132, 0, 1.864535,
2.460821667, 1.553922333), egg = c(2.598418, 1.134210667,
1.000066667, 0.293333333, 1.258387667, 0.873066667, 0.710173333,
1.50172, 0.336560667, 0.386666667), nutsandseeds = c(2.134137333,
2.657383, 0.666666667, 3.805841, 2.891846, 1.1288, 8.638817,
0, 8.836688333, 1.208168667), legumes = c(0, 0, 0, 0.049405,
0, 0, 0, 0, 0, 0.0216), dairy = c(1.473881667, 0.958579333,
0, 0.781515, 0.618272, 1.0004, 0, 0.774681, 0.093069333,
2.296922), oil = c(20.14615467, 33.00514467, 6.53916, 39.29075867,
18.61577133, 15.46193, 63.83851033, 28.77015667, 55.96778833,
36.518752), solidfat = c(39.46406233, 24.855274, 4.287493333,
19.60025, 9.680668, 8.848316667, 8.328955, 30.518353, 41.01203167,
45.50694533), sugar = c(8.338315, 3.991491, 5e-05, 2.432482667,
0.221278333, 0.338247667, 4.647888, 2.99909, 6.016041667,
7.562984667)), row.names = c(NA, 10L), class = "data.frame")