因此,我列出了我尝试进行地理编码的2500个地点。大约97个不会进行地理编码。但是不久前,有人亲自对它们进行了地理编码。我已经将旧的手工地理编码表加入了最新的地理编码列表。我想用人工地理编码的记录替换缺失/不良的地理编码。数据看起来像这样
NewLat OldLat Flag
29.019 39.213 1
41.23 41.23 0
NA 38.13 1
0.00 41.29 1
我想要这样做,以便进行以下操作:
如果NewLat为NA或0.00,则将值替换为OldLat。 如果记录的标志为1,则替换为OldLat。
希望的结果是
NewLat OldLat Flag
39.213 39.213 1
41.23 41.23 0
38.13 38.13 1
41.29 41.29 1
到目前为止我有
df$NewLat[is.na(df$NewLat)]<-df$OldLat
但是第二部分让我很沮丧。我尝试过
if("1"%in%df$Flag){df$NewLat=df$OldLat}
和elseif (df$Flag =1) {df$NewLat=df$OldLat}
和mutate(df, df$NewLat = ifelse(df$NewLat<1.0,df$OldLat,df$NewLat))
但似乎没有任何作用。
有什么建议吗?
编辑:收到帮助后,我可以正常运行,只是经度仍然保留原始记录不变。这是代码
# Set the working directory
setwd("C:/Users/bwhite/Desktop/Geocode")
# read in the Newest CDOE data that was geocoded in MapMarker; change missing to NA, 2521 records
MM <-read.csv("CDOE_Schools_021919_GEOCODED.csv", stringsAsFactors = FALSE,na.strings = c("", "NA"))
# see how many rows are missing out_county; 97 this time around
sum(is.na(MM$Out_County))
# see how many rows have a "0" for lat and long. Should match the out_county
sum(MM$NewLat<1.000)
sum(is.na(MM$NewLat))
sum(MM$NewLong <1.000 & MM$NewLong >-99.00)
sum(is.na(MM$NewLong))
# see how many bad geocode flags there are but don't include NA's, there are 150
sum(MM$Bad_Geo,na.rm=TRUE)
# Create unique ID in MM
MM$Key<-paste(MM$SCHOOL_NAME,MM$PHYSICAL_ADDRESS)
# read in the previous CDOE OpenData CSV, 2481 records
OD <-read.csv("CDPHE_CDOE_School_Locations_and_District_Office_Locations.csv", stringsAsFactors = FALSE)
# Create unique ID
OD$Key<-paste(OD$School_Name,OD$Address)
# Inner Join to see how many records match, 2189 records match
InnerTest<-merge(x=MM,y=OD[,c("COUNTY","OldLong","OldLat","Key")], by="Key")
# Left Outer Join the OD County/Lat/Long columns to the MM by School Code
Joined<-merge(x=MM,y=OD[,c("COUNTY","OldLong","OldLat","Key")], by="Key", all.x = TRUE)
################################################
############# COUNTY CALCULATIONS ##############
################################################
#Fill in the county values from the MM table with Out_County field
Joined$County = Joined$Out_County
# Fill in the remaining missing county values from the OD table County field
Joined$County[is.na(Joined$County)] <- Joined$COUNTY[is.na(Joined$County)]
# See how many schools are now missing County variable. #18 new schools remain to be geocoded by hand
sum(is.na(Joined$County))
################################################
############# Lat/LONG CALCULATIONS ##############
################################################
#For all bad geocodes, or for 0.00 lats, or for NA lats, fill in lat with the previous OpenData Lat
library(dplyr)
Joined %>%
mutate(NewLat=ifelse(is.na(NewLat)|NewLat<1.0 &Bad_Geo==1,OldLat,NewLat))
#For all bad geocodes, or for 0.00 longs, or for NA longs, fill in long with the previous OpenData long
Joined %>%
mutate(NewLong=ifelse(is.na(NewLong)|NewLong=0.00 &Bad_Geo==1,OldLong,NewLong))
# See how many lats are missing now ; now there are zero!
sum(is.na(Joined$NewLat))
sum(MM$Latitude==0)
# See how many longs are missing now; There are still 97
sum(is.na(Joined$NewLong))
sum(MM$NewLong==0)
# Remove extraneous columns
Joined$Key<-NULL
# Export to Excel
?write.csv
write.csv(Joined,file="CDOE_Schools_GEOCODED_CLEANED.csv")
这是dput(head(MM,15))
structure(list(SCHOOL_CODE = c(2572L, 5828L, 5972L, 7296L, 8762L,
10L, 11L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), SCHOOL_NAME = c("LEGACY ACADEMY",
"MESA VALLEY COMMUNITY SCHOOL", "MOLHOLM ELEMENTARY SCHOOL",
"RED SANDSTONE ELEMENTARY SCHOOL", "CHRISTIAN COMMUNITY SCHOOLS",
"ABRAHAM LINCOLN HIGH SCHOOL", "ACADEMY CHARTER SCHOOL", "ACRES GREEN ELEMENTARY SCHOOL",
"GLACIER PEAK ELEMENTARY SCHOOL", "ACADEMY OF CHARTER SCHOOLS",
"FOX HOLLOW ELEMENTARY SCHOOL", "ACADEMY ENDEAVOUR ELEMENTARY SCHOOL",
"LIBERTY MIDDLE SCHOOL", "ACADEMY INTERNATIONAL ELEMENTARY SCHOOL",
"ADAMS CITY MIDDLE SCHOOL"), PHYSICAL_ADDRESS = c("1975 LEGACY CIRCLE",
"2387 PATTERSON RD", "6000 WEST 9TH AVENUE", "551 NORTH FRONTAGE ROAD",
"3099 F ROAD", "2285 SOUTH FEDERAL BOULEVARD", "1551 PRAIRIE HAWK DRIVE",
"13524 NORTH ACRES GREEN DRIVE", "12060 JASMINE STREET", "11800 LOWELL BLVD",
"6363 SOUTH WACO STREET", "3475 HAMPTON PARK DRIVE", "21500 EAST DRY CREEK ROAD",
"8550 CHARITY DRIVE", "4451 EAST 72ND AVENUE"), PHYSICAL_CITY = c("ELIZABETH",
"GRAND JUNCTION", "LAKEWOOD", "VAIL", "GRAND JUNCTION", "DENVER",
"CASTLE ROCK", "LITTLETON", "BRIGHTON", "WESTMINSTER", "AURORA",
"COLORADO SPRINGS", "AURORA", "COLORADO SPRINGS", "COMMERCE CITY"
), PHISICAL_STATE = c("CO", "CO", "CO", "CO", "CO", "CO", "CO",
"CO", "CO", "CO", "CO", "CO", "CO", "CO", "CO"), PHYSICAL_ZIPCODE = c(80107L,
81505L, 80214L, 81657L, 81504L, 80219L, 80104L, 80124L, 80605L,
80031L, 80116L, 80920L, 80016L, 80920L, 80022L), PHYSICAL_ZIPCODE_4 = c(8330L,
1219L, 2301L, 4062L, NA, 5433L, 7900L, 2701L, 4625L, 5097L, 1098L,
4611L, 2086L, 7360L, 1405L), PHONE = c(3036462636, 9702547202,
3039826207, 9703282910, 9704344619, 7204235000, 3036604881, 3033877125,
7209725940, 3032898088, 7208868700, 7192345600, 7208862400, 7192344000,
3032895881), LOWEST_GRADE = c("Kindergarten", "Kindergarten",
"Preschool", "Preschool", "Preschool", "9th Grade", "Preschool",
"Preschool", "Kindergarten", "Preschool", "Preschool", "Kindergarten",
"6th Grade", "Preschool", "6th Grade"), HIGHEST_GRADE = c("8th Grade",
"12th Grade", "6th Grade", "5th Grade", "Preschool", "12th Grade",
"8th Grade", "6th Grade", "5th Grade", "12th Grade", "5th Grade",
"5th Grade", "8th Grade", "5th Grade", "8th Grade"), ORGANIZATION_CODE = c(920L,
2000L, 1420L, 910L, 2000L, 880L, 900L, 900L, 20L, 8001L, 130L,
1040L, 130L, 1040L, 30L), District_Name = c("ELIZABETH SCHOOL DISTRICT",
"MESA COUNTY VALLEY 51", "JEFFERSON COUNTY R-1", "EAGLE COUNTY RE 50",
"MESA COUNTY VALLEY 51", "DENVER COUNTY 1", "DOUGLAS COUNTY RE 1",
"DOUGLAS COUNTY RE 1", "ADAMS 12 FIVE STAR SCHOOLS", "CHARTER SCHOOL INSTITUTE",
"CHERRY CREEK 5", "ACADEMY 20", "CHERRY CREEK 5", "ACADEMY 20",
"ADAMS COUNTY 14"), District_Setting = c("Remote", "Urban-Suburban",
"Denver Metro", "Outlying Town", NA, "Denver Metro", "Denver Metro",
"Denver Metro", "Denver Metro", "Urban-Suburban", "Denver Metro",
"Urban-Suburban", "Denver Metro", "Urban-Suburban", "Denver Metro"
), CHARTER = c("Y", "Y", "N", "N", NA, "N", "Y", "N", "N", "Y",
"N", "N", "N", "N", "N"), Type = c("Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Non-Public School Mailing Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address "
), County = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Out_County = c("ELBERT", "MESA", "JEFFERSON", "EAGLE",
"MESA", "DENVER", "DOUGLAS", "DOUGLAS", "ADAMS", "ADAMS", "ARAPAHOE",
"EL PASO", "ARAPAHOE", "EL PASO", "ADAMS"), Organization_Size = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
MatchCode = c("S80", "S80", "S80", "S80", "S80", "S80", "S90",
"S82", "S90", "S80", "S90", "S80", "S80", "S80", "S80"),
LocationCode = c("AI0", "AI0", "AI0", "AI0", "AI0", "AS0",
"AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0"
), NewLong = c(-104.627296, -108.537918, -105.11515, -106.389023,
-108.47805, -105.025124, -104.87014, -104.896454, -104.917328,
-105.034142, -104.780891, -104.761169, -104.735603, -104.764404,
-104.935112), NewLat = c(39.359467, 39.09177, 39.731579,
39.645741, 39.091736, 39.676849, 39.384583, 39.557961, 39.915554,
39.911575, 39.601196, 38.952129, 39.579823, 38.961929, 39.827293
), Bad_Geo = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), Key = c("LEGACY ACADEMY 1975 LEGACY CIRCLE",
"MESA VALLEY COMMUNITY SCHOOL 2387 PATTERSON RD", "MOLHOLM ELEMENTARY SCHOOL 6000 WEST 9TH AVENUE",
"RED SANDSTONE ELEMENTARY SCHOOL 551 NORTH FRONTAGE ROAD",
"CHRISTIAN COMMUNITY SCHOOLS 3099 F ROAD", "ABRAHAM LINCOLN HIGH SCHOOL 2285 SOUTH FEDERAL BOULEVARD",
"ACADEMY CHARTER SCHOOL 1551 PRAIRIE HAWK DRIVE", "ACRES GREEN ELEMENTARY SCHOOL 13524 NORTH ACRES GREEN DRIVE",
"GLACIER PEAK ELEMENTARY SCHOOL 12060 JASMINE STREET", "ACADEMY OF CHARTER SCHOOLS 11800 LOWELL BLVD",
"FOX HOLLOW ELEMENTARY SCHOOL 6363 SOUTH WACO STREET", "ACADEMY ENDEAVOUR ELEMENTARY SCHOOL 3475 HAMPTON PARK DRIVE",
"LIBERTY MIDDLE SCHOOL 21500 EAST DRY CREEK ROAD", "ACADEMY INTERNATIONAL ELEMENTARY SCHOOL 8550 CHARITY DRIVE",
"ADAMS CITY MIDDLE SCHOOL 4451 EAST 72ND AVENUE")), .Names = c("SCHOOL_CODE",
"SCHOOL_NAME", "PHYSICAL_ADDRESS", "PHYSICAL_CITY", "PHISICAL_STATE",
"PHYSICAL_ZIPCODE", "PHYSICAL_ZIPCODE_4", "PHONE", "LOWEST_GRADE",
"HIGHEST_GRADE", "ORGANIZATION_CODE", "District_Name", "District_Setting",
"CHARTER", "Type", "County", "Out_County", "Organization_Size",
"MatchCode", "LocationCode", "NewLong", "NewLat", "Bad_Geo",
"Key"), row.names = c(NA, 15L), class = "data.frame")
> dput(head(MM,15))
structure(list(SCHOOL_CODE = c(2572L, 5828L, 5972L, 7296L, 8762L,
10L, 11L, 12L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), SCHOOL_NAME = c("LEGACY ACADEMY",
"MESA VALLEY COMMUNITY SCHOOL", "MOLHOLM ELEMENTARY SCHOOL",
"RED SANDSTONE ELEMENTARY SCHOOL", "CHRISTIAN COMMUNITY SCHOOLS",
"ABRAHAM LINCOLN HIGH SCHOOL", "ACADEMY CHARTER SCHOOL", "ACRES GREEN ELEMENTARY SCHOOL",
"GLACIER PEAK ELEMENTARY SCHOOL", "ACADEMY OF CHARTER SCHOOLS",
"FOX HOLLOW ELEMENTARY SCHOOL", "ACADEMY ENDEAVOUR ELEMENTARY SCHOOL",
"LIBERTY MIDDLE SCHOOL", "ACADEMY INTERNATIONAL ELEMENTARY SCHOOL",
"ADAMS CITY MIDDLE SCHOOL"), PHYSICAL_ADDRESS = c("1975 LEGACY CIRCLE",
"2387 PATTERSON RD", "6000 WEST 9TH AVENUE", "551 NORTH FRONTAGE ROAD",
"3099 F ROAD", "2285 SOUTH FEDERAL BOULEVARD", "1551 PRAIRIE HAWK DRIVE",
"13524 NORTH ACRES GREEN DRIVE", "12060 JASMINE STREET", "11800 LOWELL BLVD",
"6363 SOUTH WACO STREET", "3475 HAMPTON PARK DRIVE", "21500 EAST DRY CREEK ROAD",
"8550 CHARITY DRIVE", "4451 EAST 72ND AVENUE"), PHYSICAL_CITY = c("ELIZABETH",
"GRAND JUNCTION", "LAKEWOOD", "VAIL", "GRAND JUNCTION", "DENVER",
"CASTLE ROCK", "LITTLETON", "BRIGHTON", "WESTMINSTER", "AURORA",
"COLORADO SPRINGS", "AURORA", "COLORADO SPRINGS", "COMMERCE CITY"
), PHISICAL_STATE = c("CO", "CO", "CO", "CO", "CO", "CO", "CO",
"CO", "CO", "CO", "CO", "CO", "CO", "CO", "CO"), PHYSICAL_ZIPCODE = c(80107L,
81505L, 80214L, 81657L, 81504L, 80219L, 80104L, 80124L, 80605L,
80031L, 80116L, 80920L, 80016L, 80920L, 80022L), PHYSICAL_ZIPCODE_4 = c(8330L,
1219L, 2301L, 4062L, NA, 5433L, 7900L, 2701L, 4625L, 5097L, 1098L,
4611L, 2086L, 7360L, 1405L), PHONE = c(3036462636, 9702547202,
3039826207, 9703282910, 9704344619, 7204235000, 3036604881, 3033877125,
7209725940, 3032898088, 7208868700, 7192345600, 7208862400, 7192344000,
3032895881), LOWEST_GRADE = c("Kindergarten", "Kindergarten",
"Preschool", "Preschool", "Preschool", "9th Grade", "Preschool",
"Preschool", "Kindergarten", "Preschool", "Preschool", "Kindergarten",
"6th Grade", "Preschool", "6th Grade"), HIGHEST_GRADE = c("8th Grade",
"12th Grade", "6th Grade", "5th Grade", "Preschool", "12th Grade",
"8th Grade", "6th Grade", "5th Grade", "12th Grade", "5th Grade",
"5th Grade", "8th Grade", "5th Grade", "8th Grade"), ORGANIZATION_CODE = c(920L,
2000L, 1420L, 910L, 2000L, 880L, 900L, 900L, 20L, 8001L, 130L,
1040L, 130L, 1040L, 30L), District_Name = c("ELIZABETH SCHOOL DISTRICT",
"MESA COUNTY VALLEY 51", "JEFFERSON COUNTY R-1", "EAGLE COUNTY RE 50",
"MESA COUNTY VALLEY 51", "DENVER COUNTY 1", "DOUGLAS COUNTY RE 1",
"DOUGLAS COUNTY RE 1", "ADAMS 12 FIVE STAR SCHOOLS", "CHARTER SCHOOL INSTITUTE",
"CHERRY CREEK 5", "ACADEMY 20", "CHERRY CREEK 5", "ACADEMY 20",
"ADAMS COUNTY 14"), District_Setting = c("Remote", "Urban-Suburban",
"Denver Metro", "Outlying Town", NA, "Denver Metro", "Denver Metro",
"Denver Metro", "Denver Metro", "Urban-Suburban", "Denver Metro",
"Urban-Suburban", "Denver Metro", "Urban-Suburban", "Denver Metro"
), CHARTER = c("Y", "Y", "N", "N", NA, "N", "Y", "N", "N", "Y",
"N", "N", "N", "N", "N"), Type = c("Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Non-Public School Mailing Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address ",
"Public School Physical Address ", "Public School Physical Address "
), County = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), Out_County = c("ELBERT", "MESA", "JEFFERSON", "EAGLE",
"MESA", "DENVER", "DOUGLAS", "DOUGLAS", "ADAMS", "ADAMS", "ARAPAHOE",
"EL PASO", "ARAPAHOE", "EL PASO", "ADAMS"), Organization_Size = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
MatchCode = c("S80", "S80", "S80", "S80", "S80", "S80", "S90",
"S82", "S90", "S80", "S90", "S80", "S80", "S80", "S80"),
LocationCode = c("AI0", "AI0", "AI0", "AI0", "AI0", "AS0",
"AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0", "AS0"
), NewLong = c(-104.627296, -108.537918, -105.11515, -106.389023,
-108.47805, -105.025124, -104.87014, -104.896454, -104.917328,
-105.034142, -104.780891, -104.761169, -104.735603, -104.764404,
-104.935112), NewLat = c(39.359467, 39.09177, 39.731579,
39.645741, 39.091736, 39.676849, 39.384583, 39.557961, 39.915554,
39.911575, 39.601196, 38.952129, 39.579823, 38.961929, 39.827293
), Bad_Geo = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), Key = c("LEGACY ACADEMY 1975 LEGACY CIRCLE",
"MESA VALLEY COMMUNITY SCHOOL 2387 PATTERSON RD", "MOLHOLM ELEMENTARY SCHOOL 6000 WEST 9TH AVENUE",
"RED SANDSTONE ELEMENTARY SCHOOL 551 NORTH FRONTAGE ROAD",
"CHRISTIAN COMMUNITY SCHOOLS 3099 F ROAD", "ABRAHAM LINCOLN HIGH SCHOOL 2285 SOUTH FEDERAL BOULEVARD",
"ACADEMY CHARTER SCHOOL 1551 PRAIRIE HAWK DRIVE", "ACRES GREEN ELEMENTARY SCHOOL 13524 NORTH ACRES GREEN DRIVE",
"GLACIER PEAK ELEMENTARY SCHOOL 12060 JASMINE STREET", "ACADEMY OF CHARTER SCHOOLS 11800 LOWELL BLVD",
"FOX HOLLOW ELEMENTARY SCHOOL 6363 SOUTH WACO STREET", "ACADEMY ENDEAVOUR ELEMENTARY SCHOOL 3475 HAMPTON PARK DRIVE",
"LIBERTY MIDDLE SCHOOL 21500 EAST DRY CREEK ROAD", "ACADEMY INTERNATIONAL ELEMENTARY SCHOOL 8550 CHARITY DRIVE",
"ADAMS CITY MIDDLE SCHOOL 4451 EAST 72ND AVENUE")), .Names = c("SCHOOL_CODE",
"SCHOOL_NAME", "PHYSICAL_ADDRESS", "PHYSICAL_CITY", "PHISICAL_STATE",
"PHYSICAL_ZIPCODE", "PHYSICAL_ZIPCODE_4", "PHONE", "LOWEST_GRADE",
"HIGHEST_GRADE", "ORGANIZATION_CODE", "District_Name", "District_Setting",
"CHARTER", "Type", "County", "Out_County", "Organization_Size",
"MatchCode", "LocationCode", "NewLong", "NewLat", "Bad_Geo",
"Key"), row.names = c(NA, 15L), class = "data.frame")
答案 0 :(得分:0)
编辑,将期望的输出添加到前面后移动答案:
df %>%
mutate(NewLat=ifelse(is.na(NewLat)|NewLat==0 |Flag==1,OldLat,NewLat))
NewLat OldLat Flag
1 39.213 39.213 1
2 41.230 41.230 0
3 38.130 38.130 1
4 41.290 41.290 0
原件::
希望我理解逻辑: 试试:
library(dplyr)
df %>%
mutate(NewLat=ifelse(is.na(NewLat)|NewLat==0 &Flag==1,OldLat,NewLat))
结果:
NewLat OldLat Flag
1 29.019 39.213 1
2 41.230 41.230 0
3 38.130 38.130 1
4 0.000 41.290 0
也许是这样吗?
df %>%
mutate(NewLat=ifelse(is.na(NewLat)|NewLat==0 |Flag==1,OldLat,NewLat))
NewLat OldLat Flag
1 39.213 39.213 1
2 41.230 41.230 0
3 38.130 38.130 1
4 41.290 41.290 0
答案 1 :(得分:0)
您没有提供预期的输出,因此不确定这是否正确,但是我的理解是您希望替换三行。您还可以使用dplyr::if_else
代替base::ifelse
以获得更快的类型稳定的输出。
library(tidyverse)
tbl <- read_table2(
"NewLat OldLat Flag
29.019 39.213 1
41.23 41.23 0
NA 38.13 1
0.00 41.29 0"
)
tbl %>%
mutate(NewLat = if_else(is.na(NewLat) | NewLat == 0 | Flag == 1, OldLat, NewLat))
#> # A tibble: 4 x 3
#> NewLat OldLat Flag
#> <dbl> <dbl> <dbl>
#> 1 39.2 39.2 1
#> 2 41.2 41.2 0
#> 3 38.1 38.1 1
#> 4 41.3 41.3 0
由reprex package(v0.2.1)于2019-02-20创建