我想找到存储在数据框中的配置文件之间的最小距离。与存储在数据框中的其余行相比,我特别感兴趣的是一行。
这是一个数据框:
structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 393090, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6718400,
0, 311350, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2164949.7,
4834137.6, 0, 0, 0, 1187816.7, 0, 0, 0, 0, 0, 0, 1340912.5, 0
), `84` = c(0, 0, 0, 0, 0, 0, 0, 0, 8607100, 0, 0, 17586713.2,
22629743.6, 0, 0, 0, 2808791.7, 0, 0, 4026222.5, 0, 0, 0, 1981900,
0), `110` = c(2296000, 0, 0, 0, 0, 2140221.7, 0, 0, 5809230.6,
0, 0, 37134898.5, 3861828.7, 2553100, 0, 12075845.8, 0, 0, 1272950,
8695273, 0, 0, 2657180, 2710080, 0), `134` = c(0, 0, 0, 1176150,
0, 1329596.7, 1471000, 0, 6511934, 6511934, 0, 18709227.3, 0,
1041211.2, 0, 6544176.9, 0, 0, 2412651.7, 7724956.9, 2878418.3,
0, 8620131.7, 2386972.8, 0), `165` = c(0, 1226610, 0, 1345098.7,
2083771.9, 0, 1808231.4, 0, 0, 10742997.7, 0, 13060798.9, 0,
538340, 538340, 2791649.5, 0, 0, 6217622, 1316097.1, 4716931.8,
0, 6615816.9, 1510532, 0), `199` = c(0, 1571525, 0, 1903038.3,
1676700, 0, 888832.2, 0, 0, 9084418.6, 0, 11189460.1, 0, 0, 1807662.5,
2564275, 0, 0, 18080359.7, 0, 0, 0, 2397710.2, 1717949.2, 0),
`234` = c(0, 1314900, 2482696, 1325684, 0, 0, 0, 0, 0, 7321432.7,
0, 9843409.2, 0, 0, 1073341.7, 2762775, 0, 0, 9335312.8,
0, 0, 0, 1950788.2, 1509100, 0), `257` = c(0, 1568700, 14604298.7,
940162.2, 0, 0, 0, 0, 0, 4779505.9, 0, 9691692.4, 0, 0, 735290,
2650165, 0, 2311383.7, 5193383.4, 0, 0, 0, 1341998.7, 1225325.6,
0), `362` = c(0, 0, 4190740.5, 288800, 0, 0, 0, 0, 0, 4846634.8,
0, 9574498.7, 0, 0, 0, 1425600, 0, 8339312.1, 3877892.5,
0, 0, 0, 1752866.7, 0, 0), `433` = c(0, 0, 773280, 0, 0,
0, 0, 0, 0, 3926582.8, 3926582.8, 5962586.5, 0, 0, 0, 1041400,
0, 1972909.3, 1895439.4, 0, 0, 0, 963891.2, 0, 1109800),
`506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9332272, 0, 0, 0,
0, 0, 0, 2219100, 0, 0, 0, 0, 0, 0, 0), `581` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 4371537.1, 0, 0, 0, 0, 0, 0, 2428800,
0, 0, 0, 0, 0, 0, 0), `652` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1689871.4, 0, 0, 0, 0, 0, 0, 988399.7, 0, 0, 0, 0, 0,
0, 0), `733` = c(0, 0, 0, 0, 0, 0, 0, 1250100, 0, 0, 1754205.3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `818` = c(0, 0,
0, 0, 0, 0, 0, 517340, 0, 0, 1149227.6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 579846.7,
0, 0, 985931.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`972` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 858255.5, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1039` = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 848993.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0)), .Names = c("10", "34", "59", "84", "110", "134",
"165", "199", "234", "257", "362", "433", "506", "581", "652",
"733", "818", "896", "972", "1039"), row.names = c("Mark_1",
"Mark_2", "Alex_1", "Katrin_1", "Georg_1", "Martin_1",
"Tim_1", "Tom_1", "Mike_1", "Mike_2", "Mike_3",
"Hare_1", "Dea_1", "Monty_1", "Monty_2", "Niko_1",
"Lee_1", "Marq_1", "Otto_1", "Priaq_1", "Surkta_1",
"Norsa_1", "Norsa_2", "Quer_1", "Quer_2"), class = "data.frame")
所以名为Katrin_1
的行是我感兴趣的行。我想找到哪些行的欧氏距离最小为Katrin_1
。比方说3-5行。
答案 0 :(得分:4)
让我们用Katrin_1
删除df[!rownames(df) %in% "Katrin_1", ]
列,用df["Katrin_1", ]
从剩下的每一行中减去sweep
,通过平方得到欧几里德距离矩阵元素并使用rowSums
,使用which.min
得到最终结果:
names(which.min(rowSums(sweep(df[!rownames(df) %in% "Katrin_1", ], 2, as.numeric(df["Katrin_1", ]), `-`)^2)))
# [1] "Mark_2"
这应该比使用dist
更有效率,因为dist
会计算所有可能的距离,而我们只需要几个。