通过共同的ID从一个城市到另一个城市的平均距离

时间:2020-08-20 06:49:54

标签: r geospatial distance

我根本不了解space.data。我一直在学习,但是想念一些东西。

我所拥有的:data.frame enterprises,其列为:id,parent_subsidiary,city_cod。

我需要的东西:从父城市到子城市的平均值和最大距离。

例如:

    id         |     mean_dist     | max_dist
 1111          |         25km      |     50km    
 232           |        110km      |    180km  
 333           |          0km      |      0km  

我做什么

library("tidyverse")
library("sf")
# library("brazilmaps")   not working anymore
library("geobr")


parent <- enterprises %>% filter(parent_subsidiary==1) 
subsidiary <- enterprises %>% filter(parent_subsidiary==2) 

# Cities - polygons 
m_city_br <- read_municipality(code_muni="all", year=2019)

# or shp_city<- st_read("/BR_Municipios_2019.shp")

# data.frame with the column geom
map_parent  <- left_join(parent, m_city_br, by=c("city_cod"="code_muni"))
map_subsidiary <- left_join(subsidiary, m_city_br, by=c("city_cod"="code_muni"))



st_distance(map_parent$geom[1],map_subsidiary$geom[2]) %>% units::set_units(km)
# it took a long time and the result is different from google.maps
# is it ok?!


# To do by ID -- I also stucked here

distance_p_s <- data.frame(id=as.numeric(),subsidiar=as.numeric(),mean_dist=as.numeric(),max_dist=as.numeric())

id_v <- as.vector(parent$id)



for (i in 1:length(id_v)){
  
 
  test_p <- map_parent %>% filter(id==id_v[i])  
  test_s <- map_subsidiary %>% filter(id==id_v[i])
  total <- 0
  value <- 0
  max <- 0
  l <- 0
  
  l <- nrow(test_s)

      for (j in 1:l){

         value <- as.numeric(round(st_distance(test_p$geom[1],test_s$geom[j]) %>% units::set_units(km),2))
          
         total <- total + value
         ifelse(value>max,max<-value,NA)
      }
  

  mean_dist <- total/l
  done <- data.frame(id=id[i],subsidiary=l,mean_dist=round(mean_dist,2),max_dist=max)
  distance_p_s <- rbind(distance_p_s,done)
  
  rm(done)
  
}
}



对吗? 我可以计算城市的质心并计算距离吗?

我意识到从code_muni == 4111407到code_muni == 4110102的距离是0,但是是另一个城市(Imbituva,PR,巴西-Ivaí,PR,Brasil)。为什么?

数据示例 structure(list(id = c("1111", "1111", "1111", "1111", "232", "232", "232", "232", "3123", "3123", "4455", "4455", "686", "333", "333", "14112", "14112", "14112", "3633", "3633"), parent_subsidiary = c("1","2", "2", "2", "1", "2", "2", "2", "1", "2", "1", "2", "1", "2", "1", "1", "2", "2", "1", "2"), city_cod = c(4305801L,4202404L, 4314803L, 4314902L, 4318705L, 1303403L, 4304507L, 4314100L, 2408102L, 3144409L, 5208707L, 4205407L, 5210000L, 3203908L, 3518800L, 3118601L, 4217303L, 3118601L, 5003702L, 5205109L)), row.names = c(NA, 20L), class = "data.frame")

PS:这是巴西的城市 https://github.com/ipeaGIT/geobr/tree/master/r-package

2 个答案:

答案 0 :(得分:1)

大问题。我看了一会儿。然后我回过头来,仔细想了一下。没有计算平均值。仅确定每个母公司与其子公司之间的距离。

数据已绑定-城市数据和数据框数据。然后对新的df进行突变,以添加曲面上每个点的质心数据。

将df按ID拆分,并生成8 df的列表。每个df包含独立的母公司以及相关的子公司。 (1:4,1:3,1:4,1:2,....)

一个带有函数的循环清除了8 df,并计算了每个父对象到每个子对象的距离。

我对照列表中与网站的距离值检查了列表中第一个df的距离。 df1与网站的距离几乎相同。

输出显示在[link]

答案 1 :(得分:0)

我做了类似的事情:


distance_p_s <- data.frame(id=as.character(),
                            qtd_subsidiary=as.numeric(),
                            dist_min=as.numeric(),
                            dist_media=as.numeric(),
                            dist_max=as.numeric())

id <- as.vector(mparentid$id)

for (i in 1:length(id)){
  
  eval(parse(text=paste0("
                         print('Filtering id: ",id[i]," (",i," of ",length(id),")')
                         ")))
  teste_m <- mparentid %>% filter(id==id[i]) %>% st_as_sf()
  teste_f <- msubsidiaryid %>% filter(id==id[i]) %>% st_as_sf()
  
  teste_f <- st_centroid(teste_f)
  teste_m <- st_centroid(teste_m)
  
  teste_f = st_transform(teste_f, 4674)
  teste_m = st_transform(teste_m, 4674)
  
  total <- 0
  value <- 0
  min <- 0
  max <- 0
  l <- 0
  
  l <- nrow(teste_f)
  
  for (j in 1:l){
    
    eval(parse(text=paste0("
                         print('Tratando id: ",id[i]," (",i," de ",length(id),"), subsidiary: ",j," de ",l,"')
                         ")))
    
    value <- as.numeric(round(st_distance(teste_m$geom[1],teste_f$geom[j]) %>% units::set_units(km),2))
    
    total <- total + value
    ifelse(value>max,max<-value,NA)
    if(j==1){
      min<-value
    } else { 
      ifelse(value<min,min<-value,NA)}
  }
  
  
  dist_med <- total/l
  done <- data.frame(id=id[i],qtd_subsidiary=l,dist_min=min,dist_media=round(dist_med,2),dist_max=max)
  distance_p_s <- rbind(distance_p_s,done)
  
  eval(parse(text=paste0("
                         print('Concluido id: ",id[i]," (",i," de ",length(id),"), subsidiary: ",j," de ",l,"')
                         ")))
  
  rm(done)
  
}

这可能不是最好的方法,但是它暂时解决了我的问题。

相关问题