具有固定轴的散点图

时间:2018-01-25 17:25:53

标签: r ggplot2 data-visualization centering

我有一个不寻常的问题。我有几个数据集,其中包含政党和活动中的问题,在二维空间中排列。这些数据集是多维缩放的结果,我使用R将它们绘制为带有ggplot的简单散点图。我以英国为例来说明我的意思。

这是数据集:

dat <- structure(list(horizontal = c(0.204471737146378, -0.444747358560562, 
                                       -0.342559009790421, 0.83488667011261, 0.561371266841888, 0.885410964488983, 
                                       -0.329168140888214, -0.676190733909607, -0.0879427865147591, 
                                       -0.257560282945633, -0.487674087285995, -0.0497645996510983, 
                                       0.542662084102631, -0.721681654453278, -0.0316252149641514, 0.332207173109055, 
                                       -0.643045961856842, 0.506858110427856, -0.324039697647095, -0.86803138256073, 
                                       0.837070941925049, 0.559091985225677), vertical = c(-0.14312070608139, 
                                                                                           0.55092453956604, -0.118618287146091, -0.066099539399147, -0.0786356627941132, 
                                                                                           0.476942390203476, -0.206086233258247, -0.338285326957703, 0.0132009144872427, 
                                                                                           -0.415686339139938, -0.0648649260401726, -0.205041542649269, 
                                                                                           0.118428349494934, 0.294788777828217, -0.375703752040863, 0.247018560767174, 
                                                                                           0.0151952970772982, -0.273895233869553, -0.278548806905746, 0.607526957988739, 
                                                                                           -0.100193984806538, 0.34084951877594), party_id = c(NA, NA, NA, 
                                                                                                                                               NA, NA, NA, NA, NA, NA, NA, "cons", "lab_uk", "libdem", "cons", 
                                                                                                                                               "lab_uk", "libdem", "cons", "lab_uk", "libdem", "ukip", "snp", 
                                                                                                                                               "gr_uk"), issue = c("issue", "issue", "issue", "issue", "issue", 
                                                                                                                                                                   "issue", "issue", "issue", "issue", "issue", "party", "party", 
                                                                                                                                                                   "party", "party", "party", "party", "party", "party", "party", 
                                                                                                                                                                   "party", "party", "party"), object_n = c("welfare", "ecolib", 
                                                                                                                                                                                                            "ecoreform", "europe", "cultlib", "immig", "security", "defense", 
                                                                                                                                                                                                            "edu", "infra", "cons_05", "lab_uk_05", "libdem_05", "cons_10", 
                                                                                                                                                                                                            "lab_uk_10", "libdem_10", "cons_15", "lab_uk_15", "libdem_15", 
                                                                                                                                                                                                            "ukip_15", "snp_15", "gr_uk_15")), .Names = c("horizontal", "vertical", 
                                                                                                                                                                                                                                                          "party_id", "issue", "object_n"), row.names = c(NA, -22L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                               "tbl", "data.frame"))

我通过以下方式调整了一些主题参数(您不需要做的事情,特别是字体部分):

theme_set(theme_classic(base_size = 16) +
            theme(axis.line=element_blank(),axis.text.x=element_blank(),
                  axis.text.y=element_blank(),axis.ticks=element_blank(),
                  axis.title.x=element_blank(),
                  axis.title.y=element_blank(),
                  text=element_text(family="Century Gothic", size=10)))

并使用以下代码创建绘图:

dat$ff <- ifelse(dat$issue == "issue", "bold", "plain")

ggplot(dat, aes(vertical, horizontal)) +
  geom_point(data=subset(dat, dat$issue=="issue"),
             color = 'black', size=5, shape=3, show.legend = F) +
  geom_point(data=subset(dat, dat$issue=="party"),
             aes(shape=party_id, colour=party_id, fill=party_id),
             size=2) +
  geom_text_repel(aes(label = object_n, size=issue, fontface=ff), 
                  family = "Century Gothic", show.legend = F) +
  scale_colour_grey("Parties", start = 0, end = .6) +
  scale_fill_grey("Parties", start = 0, end = .6) +
  scale_size_manual("Parties", values = c(3.3, 2.8)) +
  scale_shape_manual("Parties", values=c(15, 21, 23, 24, 25, 11, 8,
                                         10,12,13,4,0,1,14,7,9))

导致与此类似的情节:

enter image description here

在MDS分析中,只有相对位置很重要,绝对坐标并不重要。因此,我想&#34;旋转&#34;散点图的方式是由点&#34; ecolib&#34; &#34;福利&#34; 定义的轴始终形成水平尺寸,左边是&#34;福利&#34; ,右边是&#34; ecolib&#34; 。其他一切都可以在他们周围自由浮动。

我意识到我需要在两点之间调整原始坐标和居中。但是,我还不知道如何。非常感谢任何帮助!

1 个答案:

答案 0 :(得分:1)

要将福利和ecolib放在x轴上,移动所有点,使这两点之间的线穿过原点,然后将所有点旋转两点之间的线与x轴形成的角度

library(tidyverse)
library(ggrepel)

# Extract the welfare and ecolib rows
n = dat %>% filter(grepl("ecolib|welfare", object_n))

# Get the slope and intercept of the line between them
slope = diff(n$vertical)/diff(n$horizontal)
intercept = n$vertical[2] - slope*n$horizontal[2]

# To check this, draw the line to show that we have the correct line
# Assuming you've saved your previous plot as object p and have switched 
#  horizontal and vertical to be, respectively, on the x and y axes
p + geom_abline(slope=slope, intercept=intercept)

# Shift y-values by value of intercept so that line connecting welfare and ecolib
#  goes through the origin
dat$ynew = dat$vertical - intercept

# Get rotation angle to rotate welfare and ecolib to x-axis
angle = atan((n$vertical[1] - intercept)/n$horizontal[1])

# Rotate all points by the angle we just calculated and add the new x and y
#  values to the data frame
# %*% is the matrix multiplication operator and the second matrix is the 
#  rotation matrix for the transformation
rot = as.matrix(dat[,c("horizontal", "ynew")]) %*% matrix(c(cos(angle), -sin(angle), sin(angle), cos(angle)), nrow=2, byrow=TRUE)
dat$xnew = rot[,1]
dat$ynew = rot[,2]

现在我们已准备好绘制新的移位和旋转值:

ggplot(dat, aes(xnew, ynew)) +
  geom_point(data=subset(dat, dat$issue=="issue"),
             color = 'black', size=5, shape=3, show.legend = F) +
  geom_point(data=subset(dat, dat$issue=="party"),
             aes(shape=party_id, colour=party_id, fill=party_id),
             size=2) +
  geom_text_repel(aes(label = object_n, size=issue, fontface=ff), 
            family = "Century Gothic", show.legend = F) +
  scale_colour_grey("Parties", start = 0, end = .6) +
  scale_fill_grey("Parties", start = 0, end = .6) +
  scale_size_manual("Parties", values = c(3.3, 2.8)) +
  scale_shape_manual("Parties", values=c(15, 21, 23, 24, 25, 11, 8,
                                         10,12,13,4,0,1,14,7,9))

enter image description here