我正在使用纬度和经度数据在leaflet
地图上绘制线(请参见下文)。理想情况下,这些行将存储在数据帧的lat
和lng
列中。在lat
列中,每个起始点lat值后都将跟随一个终点lat值,然后是另一行的起始点lat值(line_id
列中可以区分每行)。 lng
数据的排列方式类似。理想情况下,数据框应如下所示:
> df.better
line_id lat lng
1 ABC 51.50995 -0.1345093
2 ABC 51.51074 -0.1345093
3 XYZ 51.50991 -0.1345193
4 XYZ 51.51079 -0.1351200
问题在于它以这种格式从数据存储中出来:
> df.wide
line_id start_lat end_lat start_lng end_lng
1 ABC 51.50995 51.51074 -0.1345093 -0.13519
2 XYZ 51.50991 51.51079 0.1351900 0.13512
这看起来有点像经典的“宽到长”数据争用问题,对此存在很多疑问和答案,但是标准的“长”格式将经纬度和经度数据折叠为一列,我需要两列。我尝试了以下tidyverse解决方案:
df2 <- df.wide %>% pivot_longer(cols = start_lat:end_lng,
names_to="variable",
values_to="value")
然后我清理variable
列:
df2$variable <- gsub(".*_lat","lat",df2$variable)
df2$variable <- gsub(".*_lng","lng",df2$variable)
这是结果,至少看起来数据顺序正确:
> df2
A tibble: 8 x 3
line_id variable value
<fct> <chr> <dbl>
1 ABC lat 51.50995
2 ABC lat 51.51074
3 ABC lng -0.1345093
4 ABC lng -0.13519
5 XYZ lat 51.50991
6 XYZ lat 51.51079
7 XYZ lng 0.13519
8 XYZ lng 0.135120
最后一步似乎涉及再次散布数据,但是使用pivot_wider
会导致无法唯一标识值的投诉:
df2 %>% pivot_wider(names_from = variable,values_from = value)
# A tibble: 2 x 3
line_id lat lng
<fct> <list<dbl>> <list<dbl>>
1 ABC [2] [2]
2 XYZ [2] [2]
Warning message:
Values in `value` are not uniquely identified; output will contain list-cols.
我可以(我认为)理解为什么会发生错误,但是在variable
中提供唯一标识符只会使我回到开始的地方。我应该/应该如何处理?
require(magrittr)
require(tidyr)
require(dplyr)
options(pillar.sigfig = 7)
df.better <- data.frame(
line_id = c("ABC","ABC","XYZ","XYZ"),
lat = c(51.509950,51.510736,51.509910,51.510786),
lng = c(-0.1345093,-0.1345093,-0.1345193,-0.135120)
)
df.wide <- data.frame(
line_id = c("ABC","XYZ"),
start_lat = c(51.509950,51.509910),
end_lat = c(51.510736,51.510786),
start_lng = c(-0.1345093,0.135190),
end_lng = c(-0.135190,0.135120)
)
df2 <- df.wide %>% pivot_longer(cols = start_lat:end_lng,
names_to="variable",
values_to="value")
df2$variable <- gsub(".*_lat","lat",df2$variable)
df2$variable <- gsub(".*_lng","lng",df2$variable)
df2 %>% pivot_wider(names_from = variable,values_from = value)
m <- leaflet() %>% setView(lng = -0.1345093, lat = 51.510090, zoom = 18) %>% addTiles()
for (i in unique(df.better$line_id)) { # HT: https://stackoverflow.com/a/44547502/952708
m <- m %>%
addPolylines(data = df.better[df.better$line_id == i, ],
lng = ~lng, lat = ~lat, color = "Green",
opacity = 0.5, weight = 2, dashArray = 5)
}
m
答案 0 :(得分:1)
如果我理解正确,那么您正在寻找这样的东西:
df.wide <- data.frame(
line_id = c("ABC","XYZ"),
start_lat = c(51.509950,51.509910),
end_lat = c(51.510736,51.510786),
start_lng = c(-0.1345093,0.135190),
end_lng = c(-0.135190,0.135120)
)
df.wide %>%
pivot_longer(-line_id,
names_to = c("set", ".value"),
names_pattern = "(.+)_(.+)"
)
# line_id set lat lng
# <fct> <chr> <dbl> <dbl>
#1 ABC start 51.50995 -0.1345093
#2 ABC end 51.51074 -0.13519
#3 XYZ start 51.50991 0.13519
#4 XYZ end 51.51079 0.135120
答案 1 :(得分:1)
可能会成功的方法
library(data.table)
dt <- data.table::fread("line_id start_lat end_lat start_lng end_lng
ABC 51.50995 51.51074 -0.1345093 -0.13519
XYZ 51.50991 51.51079 0.1351900 0.13512")
dt.melt <- melt( dt,
id.vars = "line_id",
measure.vars = patterns( lon = "_lng$",
lat = "_lat$" ),
variable.name = "point_id" )
# line_id point_id lon lat
# 1: ABC 1 -0.1345093 51.50995
# 2: XYZ 1 0.1351900 51.50991
# 3: ABC 2 -0.1351900 51.51074
# 4: XYZ 2 0.1351200 51.51079
library( sf )
library(dplyr)
library(leaflet)
dt.points <- st_as_sf( dt.melt, coords = c("lon", "lat"), crs = 4326)
dt.lines <- dt.points %>%
group_by( line_id ) %>%
summarise( geometry = st_combine( geometry ) ) %>%
st_cast( "LINESTRING" )
leaflet() %>% addTiles() %>% addPolylines( data = dt.lines, popup = ~line_id )