无法附加到数据框

时间:2013-05-28 16:38:37

标签: r

这是我的代码,用于提取xml文件,解析它,然后将解析后的xml文件追加到名为new_df的数据框中。 我注意到new_df是空的。它不应该是空的。我无法弄清楚,任何想法都非常感激。

library(RODBC)
library(XML)
setInternet2(TRUE)
options(timeout=200)

agentlist<-c(45693,42627,42483,42432,40187)
slotlist<-c(1135863,1145649,1135859,1135844)

for (i in 1:length(agentlist)) {
    for(j in 1:length(slotlist)) {
      if(slotlist[j]==1135863) {
        for (k in 1:8){
              url1<-c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
              url<-paste0(url1, slotlist[j])
             url2<-c("&graphtype=time&timemode=relative&relativehours=432000&timezone=est&bucket=300&averagemethod=GM&transpagelist=")
        url<-paste0(url, url2)
        url<-paste0(url, slotlist[j])
        url<-paste(url, k, sep=':')
        url3<-c("&pagecomponent=u&format=xml&agentlist=")
        url<-paste0(url, url3)
        url<-paste0(url, agentlist[i])
        tryCatch({
            download.file(url, destfile='test1.xml')
            xml_file <- xmlParse("test1.xml")    # Parse the XML
            xml_file <- xmlToList(xml_file)   

            xml_file <- xml_file[names(xml_file) == "measurement"]
            xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])

            xml_file <- lapply(xml_file, function(x) {
            alias <- x$alias
            buckets <- t(sapply(x$bucket_data, unlist))
            cbind("alias" = alias, buckets)
        })

        xml_file <- do.call("rbind", xml_file)

        xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)

        x<-xml_file[,c(1,3,7,9)]
        colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
        x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]
                     x$AGENT<-c(agentlist[i])

        temp<-rbind(x, temp)
        },error = function(e) {
        print("unable to download the url")
        })
        }
        new_df<-temp 
                     print(new_df) ###empty data frame




     }else if(slotlist[j]==1145649){
        temp<-data.frame(DATE_TIME=as.POSIXct(character()), SITE=character(), RESPONSE_TIME=as.numeric(character()), AVAIL_PERCENT=as.numeric(character()), AGENT=as.numeric(character()))
        for(l in 1:6) {
            url1<-c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
            url<-paste0(url1, slotlist[j])
            url2<-c("&graphtype=time&timemode=relative&relativehours=432000&timezone=est&bucket=300&averagemethod=GM&transpagelist=")
            url<-paste0(url, url2)
            url<-paste0(url, slotlist[j])
            url<-paste(url, l, sep=':')
            url3<-c("&pagecomponent=u&format=xml&agentlist=")
            url<-paste0(url, url3)
            url<-paste0(url, agentlist[i])
            tryCatch({
                    download.file(url, destfile='test1.xml')

                    xml_file <- xmlParse("test1.xml")    # Parse the XML
                    xml_file <- xmlToList(xml_file)   

                    xml_file <- xml_file[names(xml_file) == "measurement"]
                    xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])

                    xml_file <- lapply(xml_file, function(x) {
                      alias <- x$alias
                      buckets <- t(sapply(x$bucket_data, unlist))
                      cbind("alias" = alias, buckets)
                    })

                    xml_file <- do.call("rbind", xml_file)

                    xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)

                    x<-xml_file[,c(1,3,7,9)]
                    colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
                    x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]

                    #x$Site<-gsub('.*\\-(.*)\\-.*', '\\1', x$Site)

                    #x$DateTime<-gsub("AM", "", x$DateTime)
                    #x$DateTime<-gsub("PM", "", x$DateTime)
                    x$DATE_TIME<-as.POSIXct(x$DATE_TIME, format="%Y-%B-%d %H:%M")
                    #x$Response_Time<-as.numeric(x$Response_Time)
                    #x$Avail_Percent<-as.numeric(x$Avail_Percent)
                    x$AGENT<-c(agentlist[i])
                    temp<-rbind(x, temp)
                },error = function(e) {
                    print("unable to download the url")
            })      
        }
        new_df<-temp
                     print(new_df) ### empty data frame

    }
}
}

我注意到new_df数据框是空的。它不应该是空的。有什么想法吗?

1 个答案:

答案 0 :(得分:2)

您应该在小函数中重构代码,以便单独测试它们并获得更清晰的代码。然后,您应该使用xxapply函数来避免在for循环中获得的副作用(全局变量交互)。例如,我会这样做:

  1. 创建网址的功能
  2. 解析网址并创建data.frame
  3. 的函数
  4. 调用前两个函数的函数...
  5. 生成网址:

    create.URL <- function (slot, agent,i) {
      url1 <- c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
      url <- paste0(url1, slot)
      url2 <- c(paste0("&graphtype=time&timemode=relative&relativehours=",
                "432000&timezone=est&bucket=300&averagemethod=GM&transpagelist="))
      url <- paste0(url, url2)
      url <- paste0(url, slot)
      url <- paste(url, i, sep=':')
      url3 <- c("&pagecomponent=u&format=xml&agentlist=")
      url <- paste0(url, url3)
      url <- paste0(url, agent)
      url
    }
    

    解析文件:

    parse.URL <- function(url,agent,coerceDate=FALSE){
      download.file(url, destfile='test1.xml')
      xml_file <- xmlParse("test1.xml")    # Parse the XML
      xml_file <- xmlToList(xml_file)   
    
      xml_file <- xml_file[names(xml_file) == "measurement"]
      xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])
    
      xml_file <- lapply(xml_file, function(x) {
        alias <- x$alias
        buckets <- t(sapply(x$bucket_data, unlist))
        cbind("alias" = alias, buckets)
      })
    
      xml_file <- do.call("rbind", xml_file)
    
      xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)
    
      x<-xml_file[,c(1,3,7,9)]
      colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
      x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]
      if(coerceDate)
        x$DATE_TIME <- as.POSIXct(x$DATE_TIME, format="%Y-%B-%d %H:%M")
    
      x$AGENT <- agent
      x
    }
    

    要调用前两个函数:

    creat.dat <- function(agent,slot){
      res <- NULL
      if(slot==1135863) {
        res <- lapply(1:8,function(k){
          url <- create.URL(solt,agent,k)
          x <- parse.URL(url,agent)
          x
        })
      }else if(slot==1145649){
        res <- lapply(1:6,function(l){
          url <- create.URL(slot,agent,l)
          x <- parse.URL(url,agent,coerceDate=TRUE)
          x
        })
      }
      res
    }
    

    最后要调用所有这些,您可以使用mapply替换2 for个循环:

    mapply(creat.dat,agentlist,slotlist)