我目前正在使用此R代码来尝试清除一些我拥有的.csv和.xlsx文件。
library(dplyr)
library(readxl)
crimefolder <- "FBI_2012"
geofolder <- "GeographyFiles_AllStates"
outputfolder <- "Crime_and_Geographies"
geo_types <- read.csv(paste0(geofolder, "/", "GeographyFile_Instructions.csv"), as.is = T)
geo_types$MatchName <- sub(" ", "", tolower(geo_types$Name))
geofiles <- list.files(path = geofolder)
crimefiles <- list.files(path = crimefolder)
cleandf <- function(df, year){
df <- df[which(df[, 1] == "Agency"):nrow(df), ]
colnames(df) <- df[1, ]
df <- df[-1, 1:which(is.na(df[1, ]))[1] - 1]
df <- df[1:which(is.na(df[, 2]))[1] - 1, ]
df$Jurisdiction <- trimws(sub("(Police Dept)|(Sheriff Department)", "", df$Agency))
df$Year <- year
return(df)
}
for (file in crimefiles){
df <- read.csv(paste0(crimefolder, "/", file), header = F, na.strings = "", as.is = T)
info <- strsplit(sub("[.].*", "", file), "_")[[1]]
statename <- info[1]
year <- as.numeric(info[2])
print(paste(statename, year))
df <- cleandf(df, year)
geo_type <- geo_types[which(geo_types$MatchName == tolower(trimws(statename))), "Type"]
geo_df <- read_excel(paste0(geofolder, "/",
geofiles[grep(paste0(statename, "_", geo_type), geofiles)[1]])) #THIS IS WHERE THE ERROR OCCURS
full_df <- left_join(df, geo_df, by = c("Jurisdiction" = "NAME"))
write.csv(full_df, paste0(outputfolder, "/", file), na = "")
}
大多数代码运行得很好。该错误仅发生在变量“ geo_df”上,我在所有大写字母处都进行了注释。该错误显示为:path
不存在:“ GeographyFiles_AllStates / NA”