我可以一次成功刮掉一个id。也许作为R中的新手,将它们分开刮掉并将它们组合起来很简单,但我真的想知道我是否可以自行制作循环并自动完成。网站的格式是相同的,但每个ID都有不同的长度(但在这种情况下这不是问题)。
如果我只抓一个id,这是我的代码:
'
S1的结构如下:
library(XML)
library(rvest)
library(plyr)
library(dplyr)
library(httr)
library(data.table)
library(pipeR)
library(xml2)
url <- "XXXXXXXXXXX"
session <-html_session(url)
form <-html_form(read_html(url))[[1]]
filled_form <- set_values(form,
"id" = "S1",
"start" = "2017-01-17",
"end" = "2017-02-03",
"Password" = "lll")
s <- submit_form(session,filled_form)
z = read_xml(s$response)
z1 = as_list(z)
z2 <-z1[which(names(z1)=="scheduleList")]
result <- data.frame()
for (i in 2:length(z2[[1]])){
row <- cbind(
teacher=z2[[1]][[1]][[1]][[1]],
t_id=attr(z2[[1]][[1]],"id"),
Date=attr(z2[[1]][[i]],"date"),
class=z2[[1]][[i]][[1]][[1]][[1]][[1]],
c_id=attr(z2[[1]][[i]][[1]][[1]],"id"),
c_status=attr(z2[[1]][[i]][[1]][[1]],"status"),
score=attr(z2[[1]][[i]][[1]],"id"),
People=z2[[1]][[i]][[1]][[1]][[2]][[1]],
department=z2[[1]][[i]][[1]][[1]][[3]][[1]][[1]],
d_id=attr(z2[[1]][[i]][[1]][[1]][[3]],"id")
)
result <- rbind(result, row)
}
来自网站的S1 xml格式:
structure(list(
scheduleList = structure(list(
teacher = structure(list(name = list("Mary")), .Names = "name", id = "S1"),
schedule = structure(list(
score = structure(list(
class = structure(list(name = list("312c"), people = list("129"),
department = structure(list(name = list("English")), .Names = "name", id = "302f")),
.Names = c("name", "people", "department"), id = "312", status = "-4")),
.Names = "class", id = "1")),
.Names = "score", date = "2017-01-18"),
schedule = structure(list(
score = structure(list(
class = structure(list(name = list("316c"), people = list("87"),
department = structure(list(name = list("English")), .Names = "name", id = "302f")),
.Names = c("name", "people", "department"), id = "316", status = "-2")),
.Names = "class", id = "2")),
.Names = "score", date = "2017-01-30")),
.Names = c("teacher", "schedule", "schedule"), from = "2017-01-17", to = "2017-02-03")),
.Names = "scheduleList")
这是我试图做的循环:
<result status="success">
<code>1</code>
<note>success</note>
<scheduleList from="2017-01-17" to="2017-02-03">
<teacher id="S1">
<name>Mary</name>
</teacher>
<schedule date="2017-01-18">
<score id="1">
<class id="312" status="-4">
<name>312C</name>
<people>129</people>
<department id="302f">
<name>English</name>
</department>
</class>
</score>
</schedule>
<schedule date="2017-01-30">
<score id="2">
<class id="316" status="-2">
<name>316c</name>
<people>87</people>
<department id="302f">
<name>English</name>
</department>
</class>
</score>
</schedule>
</scheduleList>
</result>
当我以无效的方式做了两次时,它完美地工作了:
url <- "XXXXXXXXXXX"
session <-html_session(url)
form <-html_form(read_html(url))[[1]]
for (i in 1:2){
d=c("S1","S2")
filled_form[i] <- set_values(form,
"id" = d[i],
"start" = "2017-01-17",
"end" = "2017-02-03",
"Password" = "lll")
s[i] <- submit_form(session,filled_form[i])
}
Error in filled_form[i] <- set_values(form, id = d[i], :
object 'filled_form' not found
答案 0 :(得分:2)
在将值存储到代码中之前,您需要在代码中创建对象filled_form
和s
。你在原始代码中使用它们但不在循环中。
url <- "XXXXXXXXXXX"
session <-html_session(url)
form <-html_form(read_html(url))[[1]]
filled_form <- list()
s <- list()
for (i in 1:2){
d=c("S1","S2")
filled_form[[i]] <- set_values(form,
"id" = d[i],
"start" = "2017-01-17",
"end" = "2017-02-03",
"Password" = "lll")
s[[i]] <- submit_form(session,filled_form[i])
}