I have this dataframe CC.Number Date Time Accident.Type Location.1 1 12T008826 07/01/2012 1630 PD (39.26699, -76.560642) 2 12L005385 07/02/2012 1229 PD (39.000549, -76.399312) 3 12L005388 07/02/2012 1229 PD (39.00058, -76.399267) 4 12T008851 07/02/2012 445 PI (39.26367, -76.56648) 5 12T008858 07/02/2012 802 PD (39.240862, -76.599017) 6 12T008860 07/02/2012 832 PD (39.27022, -76.63926)
我想将列Location.1拆分为" alt"和" lng"列就像
CC.Number Date Time Accident.Type alt lng
1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
我试过
location <- md$Location.1
location1 <- substring(location, 2)
location2 <- substr(location1, 1, nchar(location1)-1 )
location3 <- strsplit(location2, ",")
但坚持将location3从列表转换为dataframe
我试过
ocdf<-data.frame(location2)
colnames(locdf)[1] = c("x")
df <- separate(location, col=x,into = c("lat","log"), sep = ",")
但是我收到了错误
UseMethod出错(&#34;单独_&#34;):没有适用的方法 &#39;分离_&#39;应用于类&#34;字符&#34;
的对象
答案 0 :(得分:2)
您也可以这样做,假设dat1
是您的原始数据集名称,我们可以使用strsplit和gsub。首先,我们使用gsub替换逗号和括号,然后使用strsplit按空格分割值:
df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))
# CC.Number Date Time Accident.Type Lat Long
# 1 12T008826 07/01/12 1630 PD 39.26699 -76.560642
# 2 12L005385 07/02/12 1229 PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229 PD 39.00058 -76.399267
# 4 12T008851 07/02/12 445 PI 39.26367 -76.56648
# 5 12T008858 07/02/12 802 PD 39.240862 -76.599017
# 6 12T008860 07/02/12 832 PD 39.27022 -76.63926
答案 1 :(得分:2)
handleError() {
return Observable.of([false]);
}
的 separate
也有效
tidyr
答案 2 :(得分:1)
我们可以使用extract
中的tidyr
来捕获两个只包含带点数字元素的组,并丢弃“Location.1”中的其余元素
library(tidyr)
df1 %>%
extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
答案 3 :(得分:1)
在 base 中,您可以使用trimws
删除()
,并使用read.table
在,
进行拆分。
cbind(md[1:4], read.table(sep=",", text=trimws(md$Location.1, whitespace = "[ ()]"),
col.names=c("alt", "lng")))
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.56064
#2 12L005385 07/02/2012 1229 PD 39.00055 -76.39931
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.39927
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.24086 -76.59902
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
数据:
md <- structure(list(CC.Number = c("12T008826", "12L005385", "12L005388",
"12T008851", "12T008858", "12T008860"), Date = c(" 07/01/2012",
" 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012"
), Time = c(1630L, 1229L, 1229L, 445L, 802L, 832L), Accident.Type = c(" PD",
" PD", " PD", " PI", " PD",
" PD"), Location.1 = c(" (39.26699, -76.560642)",
" (39.000549, -76.399312)", " (39.00058, -76.399267)", " (39.26367, -76.56648)",
" (39.240862, -76.599017)", " (39.27022, -76.63926)")), class = "data.frame", row.names = c(NA,
-6L))