如何解析R中的几个XML

时间:2013-06-24 20:04:26

标签: xml r

借助@SchaunW帮助,我能够找出How to parse XML to R data frame

但是在我的数据中,我需要解析多个XML数据,我的代码如下,代码运行良好的第一对几个站,但如果运行整个500个站,错误弹出:

 "Error in temps.i[sapply(temps.i, function(x) any(unlist(x) == "hourly"))] : 
  invalid subscript type 'list'":

请帮助,谢谢!

data.all = data.frame() 
lat = data.0$lat 
lon = data.0$lon 
head(data.0)
station_id  LocID   lat    lon
   10001    11694  32.82  -86.65
   10079   089214  27.65  -80.23 (node 'temperature' not exit in XML)

data.loop <- lapply(1:length(data.0$station_id), function(i) {
urls.i <- paste("http://forecast.weather.gov/MapClick.php?lat=",lat[i],"&lon=",lon[i],"&FcstType=digitalDWML",sep="")
data.i <- xmlParse(urls.i)
xml_data.i <- xmlToList(data.i)
location.i <- as.list(xml_data.i[["data"]][["location"]][["point"]])
start_time.i <- unlist(xml_data.i[["data"]][["time-layout"]][names(xml_data.i[["data"]][["time-layout"]]) == "start-valid-time"])
temps.i <- xml_data.i[["data"]][["parameters"]]
temps.i <- temps.i[names(temps.i) == "temperature"]
temps.i <- temps.i[sapply(temps.i, function(x) any(unlist(x) == "hourly"))]
temps.i <- unlist(temps.i[[1]][sapply(temps.i, names) == "value"])
data1.i <- data.frame(as.list(location.i), "hh" = start_time.i, "Temp" = temps.i)
 })

data.all <- as.data.frame(do.call(rbind, data.loop))

1 个答案:

答案 0 :(得分:0)

我尝试重新格式化以简化代码并添加测试,如果有温度矢量:

data.0 <- read.table(text='station_id  LocID   lat    lon
                 10001    11694  32.82  -86.65
                 10079   089214  27.65  -80.23',header=TRUE)
library(XML)

res <- apply(data.0,1, function(row) {
  tryCatch({
  url <- paste("http://forecast.weather.gov/MapClick.php?lat=",
               row['lat'],"&lon=",
               row['lon'],"&FcstType=digitalDWML",sep="")
    doc <- xmlParse(url)
    data <- xmlToList(doc)$data
    location <- data$location$point
    start_valid_time <- data$`time-layout`$`start-valid-time`
    if( "temperature" %in% names(data$parameters)){
      templ <- data$parameters$temperature
      temps <- as.numeric(unlist(lapply(seq_along(templ),
                                        function(x)templ[x]$value)))
      }else
        temps <- NA
        data.frame(as.list(location), hh = start_valid_time, Temps = temps)
    },error = function(e)data.frame(row['lat'],row['lon'],temps = NA))

  })

do.call(rbind,res)