我在phantomJs中创建了一个脚本。它的作用是,从特定页面中获取一些工作正常的元素。
以下是代码:
var page = new WebPage(), testindex = 0, loadInProgress = false, fs = require('fs'), i = 0, j = 0, k = 0;
page.onConsoleMessage = function(msg) { console.log(msg); };
page.onLoadStarted = function() { loadInProgress = true; console.log("load started"); };
page.onLoadFinished = function() { loadInProgress = false; console.log("load finished"); };
// Sets the User Agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36';
// Enable/Disable Javascript
// page.settings.javascriptEnabled = false;
var steps = [
function() { //Load Page
page.open("http://www.example.com/mobiles/");
page.injectJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");
},
function() { //Fetch Products
page.onCallback = function(result) {
var fs = require('fs');
fs.write('product-list.csv', result, 'w+');
};
page.evaluate(function() {
var arr_mainList = new Array();
var arr_innerList = new Array();
try {
for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
arr_mainList.push(document.getElementsByClassName("lap_thu_box")[i]);
window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].textContent + ", ");
//window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href + ", ");
var myWindow = window.open(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href);
console.log(myWindow.getElementsByClassName("item_desc")[0].textContent);
myWindow.close();
if (arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href.length > 43) {
var innerURL = arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href;
console.log(innerURL);
}
window.callPhantom(arr_mainList[i].getElementsByTagName("img")[0].getAttribute("data-original") + ", ");
arr_innerList.push(arr_mainList[i]);
for (j = 0; j < (document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length); j++) {
if ((j+1) < document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length) {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + " | ");
}
else {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + ", ");
}
};
//window.callPhantom(", ");
window.callPhantom(arr_innerList[i].getElementsByClassName("cat_price")[0].textContent.replace(/,/g, ""));
window.callPhantom("\n");
};
loadInProgress = true;
console.log("Successful.");
}
catch(ex) {
console.log("Failed: " + ex);
}
});
}
];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
setTimeout(function() {
//fs.write('product-list.html', page.content, 'w');
console.log("test complete!");
phantom.exit();
}, 100);
}
}, 5000);
现在,如果我运行该程序,我将获得csv文件中的所有信息。除了它进入window.open,phantomJs停止。我知道我无法在page.evaluate中打开新页面。但我需要获取产品说明并将其添加到csv文件中以代替产品链接。我一直在寻找几个小时,任何帮助都会很好。 注意:我的限制是我必须使用phantomJs。
答案 0 :(得分:0)
我稍微修改了你的脚本。所以现在你可以做任何你想做的事。请记住,不要让很多物品报废,否则你会遇到记忆问题。因此,如果在已使用的网站中存在分页,则使用新功能。 在此代码中,我假设您需要描述每个设备,但您也可以访问其他元素。
注意:您可能知道跨域策略不允许我们使用javascript / jQuery访问iFrame,这将是一个巨大的缺陷。你必须添加
在cmd / terminal中执行脚本时- 网络安全=无
标记。
var page = new WebPage(), innerPage = new WebPage(), testindex = 0, loadInProgress = false, fs = require('fs'), i = 0, j = 0, k = 0;
page.onConsoleMessage = function(msg) { console.log(msg); };
page.onLoadStarted = function() { loadInProgress = true; console.log("load started"); };
page.onLoadFinished = function() { loadInProgress = false; console.log("load finished"); };
// Sets the User Agent
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36';
// Enable/Disable Javascript
//page.settings.javascriptEnabled = false;
//IMPORTANT FLAGS
//--web-security=yes/no
var steps = [
function() { //Load Page
page.open("http://www.example.com/mobiles-apple/", function() {
page.injectJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js");
page.evaluate(function() {
try {
$("#main1").append('<div id="inner-data_iframes"></div>');
for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
var iFrameAdd = document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].href;
$("#inner-data_iframes").append('<iframe id="myIframe' + [i] + '" src="' + iFrameAdd + '"></iframe>');
window.document.body.scrollTop = document.body.scrollHeight;
}
console.log("Mission Successful.");
}
catch(ex) {
console.log("Failed to add iFrame.");
}
});
});
},
function() { //Fetch Products
page.onCallback = function(result) {
var fs = require('fs');
fs.write('product-list.csv', result, 'w+');
};
page.evaluate(function() {
var arr_mainList = new Array();
var arr_innerList = new Array();
try {
for (i = 0; i < (document.getElementsByClassName("item_grid")[0].getElementsByTagName("ul").length); i++) {
arr_mainList.push(document.getElementsByClassName("lap_thu_box")[i]);
window.callPhantom(arr_mainList[i].getElementsByTagName("h3")[0].getElementsByTagName("a")[0].textContent + ", ");
var desc = $("#myIframe" + [i]).contents().find(".item_desc").html();
desc = desc.replace(/,/g, "");
window.callPhantom(desc + ", ");
window.callPhantom(arr_mainList[i].getElementsByTagName("img")[0].getAttribute("data-original") + ", ");
arr_innerList.push(arr_mainList[i]);
for (j = 0; j < (document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length); j++) {
if ((j+1) < document.getElementsByClassName("lap_thu_box")[i].getElementsByTagName("ul")[0].getElementsByTagName("li").length) {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + " | ");
}
else {
window.callPhantom(arr_innerList[i].getElementsByTagName("li")[j].textContent.replace(/,/g, "") + ", ");
}
}
window.callPhantom(arr_innerList[i].getElementsByClassName("cat_price")[0].textContent.replace(/,/g, ""));
window.callPhantom("\n");
}
loadInProgress = true;
console.log("Successful.");
}
catch(ex) {
console.log("Failed: " + ex);
}
});
}
];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
setTimeout(function(){
//fs.write('product-list.html', page.content, 'w');
console.log("test complete!");
phantom.exit();
}, 100);
}
}, 5000);