计算两个数据集(最近邻居)的两个点之间的距离

时间:2014-03-01 23:21:19

标签: r distance spatial

我想计算两个不同数据集中两点之间的距离。我不想计算所有点之间的距离 - 只是到数据集B的最近点 一些例子:

数据集A - 人员
http://pastebin.com/HbaeqACi

数据集B - Waterfeatures:
http://pastebin.com/UdDvNtHs

数据集C - 城市:
http://pastebin.com/nATnkMRk

所以...我想计算每个人到最近的水上特征点的距离 我已经尝试使用 rgeos 包并且在遇到一些预测错误后,我已经让它工作了。但是,这计算(至少我假设)到每个点的所有距离,但是,正如已经说过的,我只对距离最近的水特征点的距离感兴趣。

# load csv files
persons = read.csv("persons.csv", header = TRUE)
water = read.csv("water.csv", header = TRUE)
# change dataframes to SpatialPointDataFrame and assign a projection
library(sp)
library(rgeos)
coordinates(persons) <- c("POINT_X", "POINT_Y")
proj4string(persons) <- CRS("+proj=utm +datum=WGS84")
coordinates(water) <- c("POINT_X", "POINT_Y")
proj4string(water) <- CRS("+proj=utm +datum=WGS84")

# use rgoes package to calculate the distance
distance <- gDistance(persons, water, byid=TRUE)
# works, but calculates a huge number of distances

有没有参考,我错过了。或者我需要使用其他包或功能吗?我还查看了spatstat,它能够计算到最近邻居的距离,但不能计算两个不同数据集的距离:http://hosho.ees.hokudai.ac.jp/~kubo/Rdoc/library/spatstat/html/nndist.html


编辑:
完整的R-Script包括绘制数据集:

library(RgoogleMaps)
library(ggplot2)
library(ggmap)
library(sp)
library(fossil)

#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)
city = read.csv("city.csv", header =TRUE)

# plot data
persons_ggplot2 <- persons
city_ggplot2 <- city
water_ggplot2 <- water
gc <- geocode('new york, usa')
center <- as.numeric(gc)  
G <- ggmap(get_googlemap(center = center, color = 'bw', scale = 1, zoom = 11, maptype = "terrain", frame=T), extent="device")
G1 <- G + geom_point(aes(x=POINT_X, y=POINT_Y ),data=city, shape = 22, color="black", fill = "yellow", size = 4) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=persons, shape = 8, color="red", size=2.5) + geom_point(aes(x=POINT_X, y=POINT_Y ),data=water_ggplot2, color="blue", size=1)
plot(G1)

#### calculate distance
# Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)

# Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
WaterFeatures <- rbind(tmp,water[,2:3])
tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
tmp1 <- which.min(tmp1)
tmp1 <- water[tmp1,1]
tmp1 <- data.frame(tmp1, WaterFeature=tmp)
return(tmp1)
}

#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))

enter image description here

2 个答案:

答案 0 :(得分:5)

如何为每个人编写寻找最近水上特征的函数?

#requires function earth.dist from "fossil" package
require(fossil)

#load data
persons = read.csv("person.csv", header = TRUE, stringsAsFactors=FALSE)
water = read.csv("water.csv", header =TRUE, stringsAsFactors=FALSE)

#Generate unique coordinates dataframe
UniqueCoordinates <- data.frame(unique(persons[,4:5]))
UniqueCoordinates$Id <- formatC((1:nrow(UniqueCoordinates)), width=3,flag=0)


#Generate a function that looks for the closest waterfeature for each id coordinates
NearestW <- function(id){
   tmp <- UniqueCoordinates[UniqueCoordinates$Id==id, 1:2]
   WaterFeatures <- rbind(tmp,water[,2:3])
   tmp1 <- earth.dist(WaterFeatures, dist=TRUE)[1:(nrow(WaterFeatures)-1)]
   tmp1 <- min(tmp1)
   tmp1 <- data.frame(tmp1, WaterFeature=tmp)
   return(tmp1)
 }

#apply to each id and the merge
CoordinatesWaterFeature <- ldply(UniqueCoordinates$Id, NearestW)
persons <- merge(persons, CoordinatesWaterFeature, by.x=c(4,5), by.y=c(2,3))

注意:我在原始的read.csv中添加了一个stringsAsFactors参数,它最终使合并更容易

注意:列tmp1记录最近水景的METERS数量

答案 1 :(得分:3)

也许我有点太晚了,但你可以使用spatstat来计算两个不同数据集之间的距离。命令为nncross。您必须使用的参数是两个ppp类型的对象,您可以使用as.ppp()函数创建它。