以下Rscript
脚本可以在Windows上按预期运行。但是,在Linux上,使用命令Error in open.connection(x, "rb") :
Timeout was reached: Resolving timed out after 10000 milliseconds
Calls: read_html -> read_html.default
Execution halted
,它将返回错误:
options(timeout= 4000000)
代码已设置#start
library(data.table)
library(xml2)
library(rvest)
library(stringr)
library(stringi)
library(mailR)
options(timeout= 4000000)
ranktitle <- read.csv("/tmp/title.csv",header = TRUE,stringsAsFactors = FALSE)
i <- c(1,2,3,4,5,6,7,8,9,15)
crawler <- data.frame()
basicURL <- "http://www.xiaohulu.com/rank/?plat="
for (m in i){
date <- Sys.Date()
urldate <- date-1
#url list loop
url <- str_c(basicURL,m)
web <- read_html(url,encoding = "UTF-8")
#read html code
platformdata <- web %>% html_nodes("dd") %>% html_text()
#read platformname
platform <- web %>% html_nodes("h2") %>% html_text()
#read ranknum
platformdata <- platformdata[(1:16)]
platform <- rep(platform,16)
#adjust platformname
datelist <- rep(urldate,16)
#adjust datelist
crawlera <- data.frame(datelist,ranktitle,platform,platformdata)
#write into dataframe
crawler <- rbind(crawler,crawlera)
}
#wirtefile
csvFileName <- paste("/tmp/results/platform",date,".csv",sep="")
write.csv(crawler,file = csvFileName)
,但为什么还要超时?为什么相同的代码可以在Windows上正确运行?如何解决这个问题?
顺便说一句,脚本在一周前在同一台Linux机器上正常运行。
title.csv
“title
礼物价值
礼物价值
礼物价值
礼物价值-1
弹幕条数
弹幕条数
弹幕条数
弹幕条数-1
”的内容:
<rootentity name='clm_bankingrelationship'>
<cascadingentities>
<cascadingentity name='clm_agreement'>
<fetchXML>
<fetch distinct='true'>
<entity name='clm_agreement' >
<attribute name='clm_agreementid' />
<filter type='and'>
<condition attribute='clm_agreementid' operator='eq' value='{clm_bankingrelationship.clm_agreementid}' />
</filter>
</entity>
</fetch>
</fetchXML>
<cascadingentities>
<cascadingentity name='clm_partneragreementbanking'>
<fetchXML>
<fetch distinct='true'>
<entity name='clm_partneragreementbanking' >
<attribute name='clm_partneragreementbankingid' />
<link-entity name="clm_role" from="clm_roleid" to="clm_roleid" link-type="inner" />
<filter type='and' >
<condition attribute='clm_agreementid' operator='eq' value='{clm_agreement.clm_agreementid}' />
</filter>
</entity>
</fetch>
</fetchXML>
</cascadingentity>
<cascadingentity name='account'>
<fetchXML>
<fetch distinct="true" >
<entity name="account" >
<attribute name="accountid" />
<link-entity name="clm_partneragreementbanking" from="clm_partnerid" to="accountid" link-type="inner" />
<filter type='and' >
<condition entityname="clm_partneragreementbanking" attribute="clm_agreementid" operator="eq" value='{clm_agreement.clm_agreementid}' />
<condition attribute='clm_mainpartnertypecd' operator='neq' value='858000005' />
</filter>
</entity>
</fetch>
</fetchXML>
</cascadingentity>
</cascadingentities>
</cascadingentity>
<cascadingentity name='clm_waiver'>
<fetchXML>
<fetch distinct='true'>
<entity name='clm_waiver' >
<attribute name='clm_waiverid' />
<filter type='and' >
<condition attribute='clm_bankingrelationshipid' operator='eq' value='{clm_bankingrelationship.clm_bankingrelationshipid}' />
</filter>
</entity>
</fetch>
</fetchXML>
</cascadingentity>
</cascadingentities>
</rootentity>