用R

时间:2019-10-06 15:24:34

标签: r ggplot2

我正在寻找一种使用R中的ggplot连接某些点的方法。我想将每个点连接到最近的点。这就是我的数据作为散点图的样子。

x <- c(0.81,0.82,0.82,0.82,0.83,0.83,0.83,0.84,0.84,0.84,0.85,0.85,0.85,0.86,0.86,0.86,0.87,0.87,0.87,0.88,0.88,0.88,0.89,0.89,0.89,0.9,0.9,0.9,0.91,0.91,0.91,0.92,0.92,0.92,0.93,0.93,0.93,0.93,0.93,0.94,0.94,0.94,0.94,0.94,0.95,0.95,0.95,0.95,0.95,0.96,0.96,0.96,0.96,0.96,0.97,0.97,0.97,0.97,0.97,0.98,0.98,0.98,0.98,0.98,0.99,0.99,0.99,0.99,1,1,1,1,1.01,1.01,1.01,1.01,1.02,1.02,1.02,1.02,1.03,1.03,1.03,1.03,1.04,1.04,1.04,1.04,1.05,1.05,1.05,1.05,1.06,1.06,1.06,1.06,1.07,1.07,1.07,1.07,1.08,1.08,1.08,1.08,1.09,1.09,1.09,1.09,1.1,1.1,1.1,1.1,1.11,1.11,1.11,1.11,1.12,1.12,1.12,1.12,1.13,1.13,1.13,1.13,1.14,1.14,1.15,1.15,1.16,1.16,1.17,1.17,1.18,1.18,1.19,1.19,1.2,1.2,1.21,1.21,1.22,1.22,1.23,1.23,1.24,1.24,1.25,1.25,1.26,1.26,1.27)

y <- c(-1.295,-0.535,-1.575,-1.295,-0.525,-1.575,-1.295,-0.515,-1.575,-1.285,-0.515,-1.575,-1.285,-0.505,-1.575,-1.275,-0.495,-1.575,-1.275,-0.485,-1.575,-1.265,-0.485,-1.575,-1.265,-0.475,-1.575,-1.255,-0.465,-1.575,-1.255,-0.455,-1.575,-1.245,-0.445,1.285,1.545,-1.575,-1.245,-0.435,1.165,1.675,-1.575,-1.235,-0.425,1.085,1.765,-1.575,-1.235,-0.405,1.015,1.845,-1.575,-1.225,-0.395,0.965,1.905,-1.575,-1.215,-0.385,0.915,1.965,-1.575,-1.215,-0.375,0.865,-1.575,-1.205,-0.355,0.825,-1.575,-1.205,-0.345,0.785,-1.565,-1.195,-0.325,0.745,-1.565,-1.185,-0.305,0.705,-1.565,-1.185,-0.285,0.665,-1.565,-1.175,-0.265,0.625,-1.565,-1.165,-0.245,0.585,-1.565,-1.165,-0.225,0.545,-1.565,-1.155,-0.195,0.495,-1.555,-1.145,-0.165,0.455,-1.555,-1.145,-0.135,0.405,-1.555,-1.135,-0.0849999999999999,0.345,-1.555,-1.125,-0.035,0.275,-1.545,-1.115,0.0850000000000001,0.145,-1.545,-1.115,-1.545,-1.105,-1.545,-1.095,-1.535,-1.085,-1.535,-1.085,-1.535,-1.075,-1.525,-1.065,-1.525,-1.055,-1.525,-1.045,-1.515,-1.045,-1.515,-1.035,-1.505,-1.025,-1.505,-1.015,-1.495,-1.005,-1.495)

example_df <- tibble(x = x, y = y)

ggplot(example_df, aes(x = x, y = y)) + 
  geom_point()

enter image description here

geom_line的默认行为是根据坐标在数据框中的显示顺序连接。有没有一种简单的方法可以根据点之间的欧几里得距离来连接点?

3 个答案:

答案 0 :(得分:3)

这是您所提出问题的一种解决方案,尽管我怀疑这并不是您真正想要的,但这可能会有所帮助...

distmat <- as.matrix(dist(example_df))    #matrix of Euclidean distances between rows
diag(distmat) <- Inf                      #remove zeros on diagonal
nearest <- apply(distmat, 1, which.min)   #find index of nearest point to each point
example_df$xend <- example_df$x[nearest]  #set end point of segment from each point
example_df$yend <- example_df$y[nearest]

ggplot(example_df, aes(x = x, y = y, xend = xend, yend = yend)) + 
  geom_point() +
  geom_segment(colour = "blue")

enter image description here

答案 1 :(得分:2)

另一个答案-适用于此数据,但一般而言

example_df$group <- cut(example_df$y, 
                        breaks = c(Inf, -0.8, -1.4, -Inf))     #breaks determined 'by eye'
example_df <- example_df[order(example_df$y), ]                #sort by y
ggplot(example_df, aes(x = x, y = y, group = group)) + 
  geom_point() +
  geom_path(colour = "blue")

enter image description here

答案 2 :(得分:1)

这与安德鲁·古斯塔(Andrew Gustar)的cut-based answer不同之处在于,如何分隔3条路径。我希望它更多地是一个可伸缩的过程,因此我尝试使用层次化聚类根据点之间的距离将点分为3个聚类。在这种情况下,它们很容易分离。与其他数据一起使用,可能会更加棘手,并且您可能需要不同的聚类算法。然后根据其他答案(+1),按y值排列每个簇,以正确的顺序绘制路径。

library(dplyr)
library(ggplot2)

example_df <- tibble(x = x, y = y)
clust <- hclust(dist(example_df), method = "single")

df_clustered <- example_df %>%
  mutate(cluster = as.factor(cutree(clust, k = 3))) %>%
  arrange(cluster, y)

ggplot(df_clustered, aes(x = x, y = y, color = cluster)) +
  geom_point() +
  geom_path()