我在R中使用ajdamico的lodown程序包。首先,我说这个程序包很棒,所以谢谢Anthony。过去成功运行脚本后,我有两个问题:
下面的完整脚本,在应用任何子设置之前:
get_catalog_nppes <-
function( data_name = "nppes" , output_dir , ... ){
# read in the whole NPI files page
npi.datapage <- suppressWarnings( readLines( "http://download.cms.gov/nppes/NPI_Files.html" ) )
# find the first line containing the data dissemination link
npi.dataline <- npi.datapage[ grep( "NPPES_Data_Dissemination_" , npi.datapage ) ][1]
# pull out the zipped file's name from that line
fn <- paste0( "http://download.cms.gov/nppes/" , gsub( "(.*)(NPPES_Data_Dissemination_.*\\.zip)(.*)$" , "\\2" , npi.dataline ) )
catalog <-
data.frame(
full_url = fn ,
output_filename = paste0( output_dir , "/nppes.csv" ) ,
stringsAsFactors = FALSE
)
catalog
}
lodown_nppes <-
function( data_name = "nppes" , catalog , path_to_7za = '7za' , ... ){
on.exit( print( catalog ) )
if( nrow( catalog ) != 1 ) stop( "nppes catalog must be exactly one record" )
if( ( .Platform$OS.type != 'windows' ) && ( system( paste0('"', path_to_7za , '" -h' ) ) != 0 ) ) stop( "you need to install 7-zip. if you already have it, include a path_to_7za='/directory/7za' parameter" )
tf <- tempfile()
download.file( catalog$full_url , tf , mode = 'wb' )
# extract the file, platform-specific
if ( .Platform$OS.type == 'windows' ){
unzipped_files <- unzip_warn_fail( tf , exdir = tempdir() )
} else {
# build the string to send to the terminal on non-windows systems
dos.command <- paste0( '"' , path_to_7za , '" x ' , tf , ' -o"' , tempdir() , '"' )
system( dos.command )
unzipped_files <- list.files( tempdir() , full.names = TRUE , recursive = TRUE )
}
csv.file <- unzipped_files[ grepl( '\\.csv$' , basename( unzipped_files ) , ignore.case = TRUE ) & !grepl( 'FileHeader' , basename( unzipped_files ) , ignore.case = TRUE ) ]
file.copy( csv.file , catalog$output_filename )
catalog$case_count <- R.utils::countLines( csv.file ) - 1
file.remove( unzipped_files , tf )
on.exit()
catalog
}
##Load Dataset
column_names <-
names(
read.csv(
file.path( path.expand( "~" ) , "NPPES" , "nppes.csv" ) ,
nrow = 1 )[ FALSE , , ]
)
##Load Dataframe
column_names <-
names(
read.csv(
file.path( path.expand( "~" ) , "NPPES" , "nppes.csv" ) ,
nrow = 1 )[ FALSE , , ]
)
column_names <- gsub( "\\." , "_" , tolower( column_names ) )
column_types <-
ifelse(
grepl( "npi" , column_names ) &
!grepl( "npi|codified|first|last|blergh|whistle|rule|exist|orange|way|brown|houses|outrageous|boring|substance|superficial|good|fancy|sneaky|terrific|paint|nasty|gentle|decorous|smile", column_names ) ,
'n' , 'c'
)
columns_to_import <-
c( "npi","entity_type_code","provider_business_mailing_address_postal_code","last_update_date","npi_deactivation_reason_code","npi_deactivation_date","npi_reactivation_date","authorized_official_last_name","authorized_official_first_name","healthcare_provider_taxonomy_code_1","healthcare_provider_taxonomy_code_2","healthcare_provider_taxonomy_code_3","healthcare_provider_taxonomy_code_4","healthcare_provider_taxonomy_code_5","healthcare_provider_taxonomy_code_6","healthcare_provider_taxonomy_code_7","healthcare_provider_taxonomy_code_8","healthcare_provider_taxonomy_code_9","healthcare_provider_taxonomy_code_10","healthcare_provider_taxonomy_code_11","healthcare_provider_taxonomy_code_12","healthcare_provider_taxonomy_code_13","healthcare_provider_taxonomy_code_14","healthcare_provider_taxonomy_code_15","is_sole_proprietor"
)
stopifnot( all( columns_to_import %in% column_names ) )
columns_to_import <-
columns_to_import[ order( match( columns_to_import , column_names ) ) ]
nppes <-
data.frame(
readr::read_csv(
file.path( path.expand( "~" ) , "NPPES" ,
"nppes.csv" ) ,
col_names = columns_to_import ,
col_types =
paste0(
ifelse( column_names %in% columns_to_import , column_types , '_' ) ,
collapse = ""
) ,
skip = 1
)
)