我有几个DBF文件需要稍后处理,但文件中的所有列都是字符。
允许我这样做的代码是:
library("shapefiles")
library("data.table")
library("reshape2")
library("stringr")
as.numeric.factor <- function (parFactor)
{
isAllNumerical <- all( suppressWarnings(!is.na(as.numeric( levels(parFactor)) )) == TRUE)
if (isAllNumerical == TRUE)
{
parFactor <- as.numeric( as.character( parFactor ) )
}
else
{
parFactor <- FALSE
}
return (parFactor)
}
dbf.factorsToNumeric <- function (dataFrame, colsToConvert)
{
for (colName in colsToConvert)
{
numColum <- as.numeric.factor( dataFrame[,colName] )
if (class(numColum) != "logical")
{
colPosition <- match (colName, names(dataFrame))
dataTypes <- attr(dataFrame, "data_types")
dataTypes[colPosition] <- "N"
attr(dataFrame, "data_types") <- dataTypes
dataFrame[,colName] <- numColum
}
}
return(dataFrame)
}
filePath <- file.choose() #assign file path to a variable
survey <- read.dbf( filePath , header = TRUE) #LoadFile
print(survey$header$num.records) # Print number of records
colNames <- names(survey$dbf) #get all column names in dataset
survey$dbf <- dbf.factorsToNumeric (survey$dbf, colNames) #convert required columns into numeric
outFilePath <- str_replace(filePath, ".dbf|.DBF", "2.dbf") #replace original filename to avoid overwriting
write.dbf(survey, outFilePath, FALSE) #write new file
无论如何,我可以避免进行两次“as.numeric”转换吗?
感谢。
答案 0 :(得分:2)
这是tryCatch的理想之地:
as.numeric.factor <- function (x) {
allNumeric <- TRUE
# try to convert the levels to numeric
tryCatch(vals <- as.numeric(levels(x)),
# if there is a warning, set `allNumeric` to FALSE
warning=function(err)
if(err$message == 'NAs introduced by coercion')
allNumeric<<-FALSE
#raise an error if the warning isn't the one we expected.
else stop(err))
if(allNumeric)
# if the levels are all numeric, return a numeric vector
return(vals[unclass(x)])
else
# otherwise return the oringinal factor
return(x)
}