我有以下代码:
var casper = require("casper").create({
// verbose: true,
// logLevel: "debug",
webSecurityEnabled : false
});
var links = [];
function get_links(obj) {
return obj.evaluate(function () {
var i,
l = document.querySelectorAll("a"),
l2 = [];
for (i = 0; i < l.length; i++) {
l2[i] = l[i].href;
}
return l2
});
}
function unique(arr) {
var obj = {};
for (var i = 0; i < arr.length; i++) {
if (/http(.*)?/.test(arr[i])) {
var str = arr[i];
obj[str] = true;
}
}
return Object.keys(obj);
}
function getLinksFromIframes(callback) {
this.echo("Here we come: " + this.getCurrentUrl() + "\n");
function to_frame(obj) {
var iframes = to_evaluate(obj);
iframes.forEach(function (index) {
this.withFrame(index, function () {
this.echo("We are here: " + this.getCurrentUrl());
var l = unique(get_links(this));
var i;
for (i = 0; i < l.length; i++) {
console.log(l[i]);
links.push(l[i])
}
links = unique(links);
console.log("");
to_frame(this)
});
}, obj);
}
function to_evaluate(obj) {
return obj.evaluate(function () {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
iframes.push(i);
});
return iframes;
})
}
to_frame(this);
this.then(function () {
callback.call(this);
});
}
casper.start("http://domu-test-2/node/1", function () {
getLinksFromIframes.call(this, function () {
console.log("Done!\n");
var i;
for (i = 0; i < links.length; i++) {
console.log(links[i]);
}
});
}).then(function () {}).run();
现在的问题是:
如果我想通过第一级iframe获取链接,我应该如何重构getLinksFromIframes()
函数。目前,他们共享全球&#39;变量links
。我认为links
肯定是链接列表列表并在withFrame
函数中初始化新列表,然后将这个新引用传递给子iframe。那么我应该如何通过它并“回溯”#39;嵌套iframe中的所有链接?
答案 0 :(得分:1)
如果我弄错了,你想要从你那里选择哪个iframe获取链接,并且只能从那个iframe中获取。如果是这种情况,那么您可以简单地使用switchToChildFrame
切换到所需的iframe,然后通过调用get_links(obj)
来获取链接。
我的例子有3页。在iframe中加载index.html
的{{1}}和iframe1.html
内部有另一个加载iframe1.html
的iframe。每个文件里面都有3个链接:
<强>的index.html 强>
iframe2.html
<强> iframe1.html 强>
<a href="link1/from/index">Link 1 from index</a>
<a href="link2/from/index">Link 2 from index</a>
<a href="link3/from/index">Link 3 from index</a>
<iframe src="iframe1.html"></iframe>
<强> iframe2.html 强>
<a href="link1/from/iframe1">Link 1 from iframe 1</a>
<a href="link2/from/iframe1">Link 2 from iframe 1</a>
<a href="link3/from/iframe1">Link 3 from iframe 1</a>
<iframe src="iframe2.html"></iframe>
并且重构的<a href="link1/from/iframe2">Link 1 from iframe 2</a>
<a href="link2/from/iframe2">Link 2 from iframe 2</a>
<a href="link3/from/iframe2">Link 3 from iframe 2</a>
函数将是这样的:
getLinksFromIframes
<强>结果
如果您同时评论function getLinksFromIframes(callback) {
this.echo("Here we come: " + this.getCurrentUrl() + "\n");
function to_frame(obj) {
obj.echo("We are here: " + obj.getCurrentUrl());
var l = unique(get_links(obj));
var i;
for (i = 0; i < l.length; i++) {
console.log(l[i]);
links.push(l[i])
}
links = unique(links);
console.log("");
}
function to_evaluate(obj) {
return obj.evaluate(function () {
var iframes = [];
[].forEach.call(document.querySelectorAll("iframe"), function (iframe, i) {
iframes.push(i);
});
return iframes;
})
}
// Leave both switchToChildFrame as comments to get the "index.html" links
this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe1.html"
//this.page.switchToChildFrame(0); // Uncomment to get the links of "iframe2.html"
to_frame(this);
this.then(function () {
callback.call(this);
});
}
,则会获得switchToChildFrame
的链接:
index.html
如果您取消注释第一个casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/
We are here: http://pjs.lytrax.net/node/1/
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index
Done!
http://pjs.lytrax.net/node/1/link1/from/index
http://pjs.lytrax.net/node/1/link2/from/index
http://pjs.lytrax.net/node/1/link3/from/index
,您将获得第一级switchToChildFrame
的链接:
iframe1.html
如果您取消注释第一个和第二个casperjs caspers-read-iframes.js
Here we come: http://pjs.lytrax.net/node/1/
We are here: http://pjs.lytrax.net/node/1/iframe1.html
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1
Done!
http://pjs.lytrax.net/node/1/link1/from/iframe1
http://pjs.lytrax.net/node/1/link2/from/iframe1
http://pjs.lytrax.net/node/1/link3/from/iframe1
,您将获得第二级switchToChildFrame
的链接:
iframe2.html