我的数据框中有一个列,其中包含zipcode,纬度和经度
位置
"10007 (40.71363051943297, -74.00913138370635)"
"10002 (40.71612146793143, -73.98583147024613)"
"10012 (40.72553802086304, -73.99789641059084)"
"10009 (40.72664935898081, -73.97911148500697)"
我需要将它们分成三个不同的列,如Zipcode,Latitude和Longitude。
我试着这样做
extract(Location, c("Zip-Code","Latitude", "Longitude"), "\\(([^,]+), ([^)]+)\\)")
我想使用纬度和经度使用ggmap
绘制地图谢谢
答案 0 :(得分:0)
s.tmp = "10007 (40.71363051943297, -74.00913138370635)"
对于ZIP:
gsub('([0-9]+) .*', '\\1', s.tmp)
对于纬度:
gsub('.*\\((.*),.*', '\\1', s.tmp)
经度:
gsub('.*, (.*)\\).*', '\\1', s.tmp)
答案 1 :(得分:0)
基本正则表达式提取:
library(purrr)
c("10007 (40.71363051943297, -74.00913138370635)", "10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)", "10009 (40.72664935898081, -73.97911148500697)") %>%
stringi::stri_match_all_regex("([[:digit:]]+)[[:space:]]+\\(([[:digit:]\\.\\-]+),[[:space:]]+([[:digit:]\\.\\-]+)\\)") %>%
map_df(dplyr::as_data_frame) %>%
dplyr::select(zip=V2, latitude=V3, longitude=V4)
## # A tibble: 4 × 3
## zip latitude longitude
## <chr> <chr> <chr>
## 1 10007 40.71363051943297 -74.00913138370635
## 2 10002 40.71612146793143 -73.98583147024613
## 3 10012 40.72553802086304 -73.99789641059084
## 4 10009 40.72664935898081 -73.97911148500697
更具可读性:
library(purrr)
library(stringi)
library(dplyr)
library(purrr)
dat <- c("10007 (40.71363051943297, -74.00913138370635)",
"10002 (40.71612146793143, -73.98583147024613)",
"10012 (40.72553802086304, -73.99789641059084)",
"10009 (40.72664935898081, -73.97911148500697)")
zip <- "([[:digit:]]+)"
num <- "([[:digit:]\\.\\-]+)"
space <- "[[:space:]]+"
lp <- "\\("
rp <- "\\)"
comma <- ","
match_str <- zip %s+% space %s+% lp %s+% num %s+% comma %s+% space %s+% num %s+% rp
dat %>%
stri_match_all_regex(match_str) %>%
map_df(as_data_frame) %>%
select(zip=V2, latitude=V3, longitude=V4)