这是我的代码,用于提取xml文件,解析它,然后将解析后的xml文件追加到名为new_df的数据框中。 我注意到new_df是空的。它不应该是空的。我无法弄清楚,任何想法都非常感激。
library(RODBC)
library(XML)
setInternet2(TRUE)
options(timeout=200)
agentlist<-c(45693,42627,42483,42432,40187)
slotlist<-c(1135863,1145649,1135859,1135844)
for (i in 1:length(agentlist)) {
for(j in 1:length(slotlist)) {
if(slotlist[j]==1135863) {
for (k in 1:8){
url1<-c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
url<-paste0(url1, slotlist[j])
url2<-c("&graphtype=time&timemode=relative&relativehours=432000&timezone=est&bucket=300&averagemethod=GM&transpagelist=")
url<-paste0(url, url2)
url<-paste0(url, slotlist[j])
url<-paste(url, k, sep=':')
url3<-c("&pagecomponent=u&format=xml&agentlist=")
url<-paste0(url, url3)
url<-paste0(url, agentlist[i])
tryCatch({
download.file(url, destfile='test1.xml')
xml_file <- xmlParse("test1.xml") # Parse the XML
xml_file <- xmlToList(xml_file)
xml_file <- xml_file[names(xml_file) == "measurement"]
xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])
xml_file <- lapply(xml_file, function(x) {
alias <- x$alias
buckets <- t(sapply(x$bucket_data, unlist))
cbind("alias" = alias, buckets)
})
xml_file <- do.call("rbind", xml_file)
xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)
x<-xml_file[,c(1,3,7,9)]
colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]
x$AGENT<-c(agentlist[i])
temp<-rbind(x, temp)
},error = function(e) {
print("unable to download the url")
})
}
new_df<-temp
print(new_df) ###empty data frame
}else if(slotlist[j]==1145649){
temp<-data.frame(DATE_TIME=as.POSIXct(character()), SITE=character(), RESPONSE_TIME=as.numeric(character()), AVAIL_PERCENT=as.numeric(character()), AGENT=as.numeric(character()))
for(l in 1:6) {
url1<-c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
url<-paste0(url1, slotlist[j])
url2<-c("&graphtype=time&timemode=relative&relativehours=432000&timezone=est&bucket=300&averagemethod=GM&transpagelist=")
url<-paste0(url, url2)
url<-paste0(url, slotlist[j])
url<-paste(url, l, sep=':')
url3<-c("&pagecomponent=u&format=xml&agentlist=")
url<-paste0(url, url3)
url<-paste0(url, agentlist[i])
tryCatch({
download.file(url, destfile='test1.xml')
xml_file <- xmlParse("test1.xml") # Parse the XML
xml_file <- xmlToList(xml_file)
xml_file <- xml_file[names(xml_file) == "measurement"]
xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])
xml_file <- lapply(xml_file, function(x) {
alias <- x$alias
buckets <- t(sapply(x$bucket_data, unlist))
cbind("alias" = alias, buckets)
})
xml_file <- do.call("rbind", xml_file)
xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)
x<-xml_file[,c(1,3,7,9)]
colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]
#x$Site<-gsub('.*\\-(.*)\\-.*', '\\1', x$Site)
#x$DateTime<-gsub("AM", "", x$DateTime)
#x$DateTime<-gsub("PM", "", x$DateTime)
x$DATE_TIME<-as.POSIXct(x$DATE_TIME, format="%Y-%B-%d %H:%M")
#x$Response_Time<-as.numeric(x$Response_Time)
#x$Avail_Percent<-as.numeric(x$Avail_Percent)
x$AGENT<-c(agentlist[i])
temp<-rbind(x, temp)
},error = function(e) {
print("unable to download the url")
})
}
new_df<-temp
print(new_df) ### empty data frame
}
}
}
我注意到new_df数据框是空的。它不应该是空的。有什么想法吗?
答案 0 :(得分:2)
您应该在小函数中重构代码,以便单独测试它们并获得更清晰的代码。然后,您应该使用xxapply
函数来避免在for
循环中获得的副作用(全局变量交互)。例如,我会这样做:
生成网址:
create.URL <- function (slot, agent,i) {
url1 <- c("http://api.keynote.com/keynote/api/getgraphdata?api_key=<key>&&slotidlist=")
url <- paste0(url1, slot)
url2 <- c(paste0("&graphtype=time&timemode=relative&relativehours=",
"432000&timezone=est&bucket=300&averagemethod=GM&transpagelist="))
url <- paste0(url, url2)
url <- paste0(url, slot)
url <- paste(url, i, sep=':')
url3 <- c("&pagecomponent=u&format=xml&agentlist=")
url <- paste0(url, url3)
url <- paste0(url, agent)
url
}
解析文件:
parse.URL <- function(url,agent,coerceDate=FALSE){
download.file(url, destfile='test1.xml')
xml_file <- xmlParse("test1.xml") # Parse the XML
xml_file <- xmlToList(xml_file)
xml_file <- xml_file[names(xml_file) == "measurement"]
xml_file <- lapply(xml_file, function(x) x[grep("alias|bucket", names(x))])
xml_file <- lapply(xml_file, function(x) {
alias <- x$alias
buckets <- t(sapply(x$bucket_data, unlist))
cbind("alias" = alias, buckets)
})
xml_file <- do.call("rbind", xml_file)
xml_file <- data.frame(xml_file, stringsAsFactors = FALSE)
x<-xml_file[,c(1,3,7,9)]
colnames(x)<-c("SITE", "AVAIL_PERCENT", "RESPONSE_TIME", "DATE_TIME")
x<-x[,c("DATE_TIME", "SITE", "RESPONSE_TIME", "AVAIL_PERCENT")]
if(coerceDate)
x$DATE_TIME <- as.POSIXct(x$DATE_TIME, format="%Y-%B-%d %H:%M")
x$AGENT <- agent
x
}
要调用前两个函数:
creat.dat <- function(agent,slot){
res <- NULL
if(slot==1135863) {
res <- lapply(1:8,function(k){
url <- create.URL(solt,agent,k)
x <- parse.URL(url,agent)
x
})
}else if(slot==1145649){
res <- lapply(1:6,function(l){
url <- create.URL(slot,agent,l)
x <- parse.URL(url,agent,coerceDate=TRUE)
x
})
}
res
}
最后要调用所有这些,您可以使用mapply
替换2 for
个循环:
mapply(creat.dat,agentlist,slotlist)