Question

我正在制作一个与opendata.socrata.com的api接口的R包。

我遇到了一个问题，我已经跟踪了RCurl包的构建。

在Windows上，使用openSSL构建RCurl，我没有遇到任何问题，但在Linux上，使用GnuTLS，它不起作用。

您可以使用curlVersion（）$ ssl_version来检查构建。

这是功能：

search.Socrata.Views <- function(search = NULL, ## full
                                 topic = NULL, ## description
                                 name = NULL, ## title field search
                                 tags = NULL,
                                 category = NULL,
                                 count = FALSE,
                                 limit = 10, ## max 200
                                 page = 1,
                                 type = "json" ## can also be xml
){

  require('RCurl')
  require('XML')
  require('rjson')

  ## setting curl options
  capath = system.file("CurlSSL",package = "RCurl")
  cainfo = system.file("CurlSSL", "ca-bundle.crt", package = "RCurl")

  cookie = 'cookiefile.txt'
  curl  =  getCurlHandle ( cookiefile = cookie,
                           cookiejar = cookie,
                           useragent =  "Mozilla/5.0 (Windows; U; Windows NT 5.1; en - US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6",
                           header = FALSE,
                           verbose = TRUE,
                           netrc = FALSE,
                           maxredirs = as.integer(20),
                           followlocation = TRUE,
                           ssl.verifypeer = TRUE,
                           cainfo = cainfo,
                           timeout = 100
                         )

  ## capath doesn't work:: NEED cainfo!
  ## test for existing cainfo:
  if (!file.exists(cainfo)){
    download.file('http://curl.haxx.se/ca/cacert.pem', cainfo )
  }
  ## test for age of cainfo, if older than 2 weeks get new.
  if (file.exists(cainfo)){
    file.inf.cainfo <- file.info(cainfo)
    age.cainfo <- Sys.time() - file.inf.cainfo[["mtime"]]
    if(as.numeric(age.cainfo, units="days") > 14 ){
      download.file('http://curl.haxx.se/ca/cacert.pem', cainfo )
    }
  }

  ### Make URL
  baseSocrataUrl <- 'https://opendata.socrata.com/api/views.'

  if(!is.null(category)){
    category <- match.arg( category, c('Business', 'Fun', 'Personal', 'Education', 'Government'))
  }
  type <- match.arg( type, c('json', 'xml'))

  ## Tag
  if(is.null(tags)){
    tags <- NULL
  } else {
    tags <- URLencode( paste('&tags=', tags, sep = ''))
  }
  ## Category
  if(is.null(category)){
    category <- NULL
  } else {
    category <- URLencode( paste('&category=', category, sep = ''))
  }
  ## Limit
  if(limit > 200){
    limit <- '&limit=200'
  } else {
    limit <- paste('&limit=', limit, sep = '')
  }
  ## search
  if(is.null(search)){
    search <- NULL
  } else {
    search <- URLencode( paste('&full=', search, sep = ''))
  }
  ## page
  page <- paste('&page=', page, sep = '')
  ## topic
  if(is.null(topic)){
    topic <- NULL
  } else {
    topic <- URLencode( paste('&description=', topic, sep = ''))
  }
  ## name
  if(is.null(name)){
    name <- NULL
  } else {
    name <- URLencode( paste('&name=', name, sep = ''))
  }
  ## count
  if(count){
    count <- '&count=TRUE'
  } else {
    count <- NULL
  }

  ### Retrieving html
  SocrataUrl <- paste( baseSocrataUrl, type, '?', page, tags, category, limit, search, name, topic, count, sep = '')
  SocrataHtml <- getURL(SocrataUrl, curl = curl)
  assign('search.Socrata.Call', SocrataUrl, envir=.GlobalEnv)
  if(type == 'json'){
    SocrataTable <- fromJSON(SocrataHtml)
    SocrataTable <- lapply( SocrataTable, function(x){data.frame( x, stringsAsFactors = FALSE) } )
    SocrataTable.df <- data.frame( matrix( nrow = length( SocrataTable), ncol = max(unlist(lapply(SocrataTable, length) ) ) ) )
    names(SocrataTable.df) <- names( SocrataTable [lapply( SocrataTable, length ) == max( unlist( lapply( SocrataTable, length) ) ) ] [[1]] )
    for( i in 1: length( SocrataTable ) ){
      for( j in 1: length( names( SocrataTable[[i]] ) ) ){
        SocrataTable.df[i, names( SocrataTable[[i]] )[j]] <- SocrataTable[[i]][i, names( SocrataTable[[i]] ) [j] ]
      }
    }

    rm(curl)
    gc()

    return(SocrataTable.df)
  } else {

    rm(curl)
    gc()

    return(SocrataHtml)
  }
}

使用以下命令运行该功能：

socrata.views <- search.Socrata.Views(topic = 'airplane')
print(socrata.views)

Answer 1

我没有在Linux下测试过您的代码，但我可以说您正在以艰难的方式构建URL，这可能会导致错误。使用getForm，您可以大大简化代码。

params <- list(
  category = category,
  tags     = tags,
  limit    = min(limit, 200)
  #etc.
)
params <- Filter(Negate(is.null), params)
getForm(baseSocrataUrl, .params = params, curl = curl)

如何获得此函数的SSL连接以使用RCurl的GnuTLS构建

1 个答案: