Question

我有数据表（npi1_list），其中包含ID号，并根据网站上的网页报废数量，npi列表将匹配网站中的ID号并为我提取记录。

library("rvest")
library("data.table")    
final<- NULL
    for(i in 8000:200000){
    url<-paste("http://www.npinumberlookup.org/getResultDetails.php?
    npinum=",npi1_list[i,1],sep='')
    webpage<-read_html(url)
    Name<- html_nodes(webpage, 'table:nth-child(8) tr:nth-child(1) td~ td+ td ,
    table:nth-child(6) tr:nth-child(1) td~ td+ td')
    rank_data <-html_text(Name)
    final <- rbind(final,rank_data)
    print(i)
    Sys.sleep(1)
    }

这工作正常，但有时错误显示连接超时80端口错误，然后我必须初始化我从循环终止的地方，并重新运行for循环。如何在上面执行try和catch选项for循环，这样我就可以自动化直到200000行。

Answer 1

library("rvest")
library("data.table")    
final<- NULL
for(i in 8000:200000){        
    repeat{
        successful = T   
        tryCatch({
            url<-paste("http://www.npinumberlookup.org/getResultDetails.php?
            npinum=",npi1_list[i,1],sep='')
            webpage<-read_html(url)
            Name<- html_nodes(webpage, 'table:nth-child(8) tr:nth-child(1) td~ td+ td ,
            table:nth-child(6) tr:nth-child(1) td~ td+ td')
            rank_data <-html_text(Name)
            final <- rbind(final,rank_data)
            print(i)
        }, error = function(e){
            print(e)
            print(paste0('connection error on ', i))
            successful <<- F
        }) 
        Sys.sleep(1)
        if(successful)
            break
    }
}

在报废时尝试并抓住r

1 个答案: