我正在使用casper JS并尝试废弃网站信息。我已经编写了代码并且工作完美,从某种意义上说,我能够从该网站获取所有必需的信息。实际上,它是一家公司的职业寻呼机,我正在从他们那里取消工作清单。当我通过casper.thenOpen(url,{method:"POST",data:somedata},function(){ })
将数据发送到mysql数据库时,问题就出现了。这也很完美。例如,使用此方法,我可以插入如下的静态数据:
$jobs_ins = "INSERT INTO company_jobs VALUES (NULL,'inc','job_title','job_link','job_loc','job_date','refNo','jType','jField','jLevel','jTime',10)";
if ($con - > query($jobs_ins)) {
echo "Inserted Row";
} else {
echo "Not Inserted";
}
`
但是,当我发送抓取数据时,没有插入任何数据。我不知道是什么问题,但当我console.log()
数据时,每个数据都显示完美。
下面是我的casper JS脚本:
var casper = require("casper").create({
verbose: true,
logLevel: "error",
clientScripts: ['plugins/jquery.js']
});
var X = require("casper").selectXPath,
P = 1,
MAINURL = "https://www.tempozeitarbeit.de/tempozeitarbeit/fuer-arbeitnehmer/stellenangebote-ulm-heidenheim/?page=" + P,
PAGES,
B = [],
D = [],
DETAILSURL = "https://www.tempozeitarbeit.de/contenido/includes/jobportal/jobs.php?id=";
casper.start(MAINURL);
casper.then(function() {
PAGES = casper.evaluate(function() {
return document.getElementsByClassName("jobpager")[0].children.length;
})
//Send information of the Company Start
var company_name = "TEMPOZEITARBEIT";
var company_loc = "GERMANY";
var company_logo = "https://www.tempozeitarbeit.de/cms/images/tempo_logo.png";
var company_link = "https://www.tempozeitarbeit.de/cms/";
casper.thenOpen("http://localhost/fiverr/Crawl%20The%20Jobs%20-%20Guisipe/modal_scripts.php", {
method: "POST",
data: "add_companies=true&company_name=" + company_name + "&company_loc=" + company_loc + "&company_logo=" + company_logo + "&company_link=" + company_link + "&ref_id=6",
})
//Send information of the Company End
nextPage(MAINURL);
})
function nextPage(_URL_) {
if (P == PAGES) {
console.log("S C R A P P I N G F I N I S H E D !");
return;
} else {
//BRIEF START-----------------------------------
casper.thenOpen(_URL_);
casper.then(function() {
console.log("\n\n----------------------------------------------------------------------------------------------------\n\n");
console.log("S C R A P P I N G P A G E # " + P);
B = this.evaluate(function() {
var d = [];
var el = document.getElementsByClassName("row job-table");
for (var i = 0; i < el.length; i++) {
d.push({
id: el[i].getAttribute("id"),
title: el[i].children[0].textContent,
loc: el[i].children[1].textContent,
type: el[i].children[2].textContent
})
}
return d;
})
for (var i = 0; i < B.length; i++) {
casper.thenOpen("http://localhost/fiverr/Crawl%20The%20Jobs%20-%20Guisipe/modal_scripts.php", {
method: "POST",
data: "add_jobs=true&job_title=" + B[i].title + "&refNo=_" + (i + 1) + "&job_date=none&updated_time=none&jType=" + B[i].type + "&jField=none&job_loc=" + B[i].loc + "&job_link=&jLevel=none&jTime=none",
})
getDetails(DETAILSURL + B[i].id, i)
}
P++;
nextPage("https://www.tempozeitarbeit.de/tempozeitarbeit/fuer-arbeitnehmer/stellenangebote-ulm-heidenheim/?page=" + P);
})
//BRIEF END -------------------------
}
}
//DETAILS START --------------------------------------
function getDetails(_DETAILSURL_, _I) {
casper.thenOpen(_DETAILSURL_);
casper.then(function() {
console.log("S C R A P P I N G J O B O F F E R # " + (_I + 1));
D = this.evaluate(function() {
var or = "https://www.tempozeitarbeit.de/cms/";
var els = document.body.children[1].children;
var dd = [],
final = [];
var apply = els[0].children[0].getAttribute("href");
for (var i = 0; i < els.length; i++) {
if (els[i].nodeName == "H3" && els[i + 1].nodeName == "UL") {
var temp = [];
for (var j = 0; j < els[i + 1].children.length; j++) {
temp.push(els[i + 1].children[j].textContent)
}
dd.push({
head: els[i].textContent,
desc: temp
});
} else if (els[i].nodeName == "H3" && els[i + 1].nodeName == "DIV") {
dd.push({
head: els[i].textContent,
desc: els[i + 1].textContent
});
}
}
final.push({
link: or + apply,
data: dd
});
return final;
})
// console.log("Link - "+D[0].link);
// for (var i = 0; i < D[0].data.length; i++) {
// console.log("Head - "+ D[0].data[i].head)
// console.log("Description - "+ D[0].data[i].desc)
// }
})
return D;
}
//DETAILS END --------------------------------------
casper.run();
这是我收到数据的PHP脚本:
if (isset($_POST["add_jobs"])) {
$job_title = $_POST["job_title"];
$job_link = $_POST["job_link"];
$job_loc = $_POST["job_loc"];
$job_date = $_POST["job_date"];
$refNo = $_POST["refNo"];
$jType = $_POST["jType"];
$jField = $_POST["jField"];
$jLevel = $_POST["jLevel"];
$jTime = $_POST["jTime"];
$inc = $_COOKIE["id"];
$jobs_ins = "INSERT INTO `company_jobs` VALUES (NULL,'$inc','$job_title','$job_link','$job_loc','$job_date','$refNo','$jType','$jField','$jLevel','$jTime',10)";
if ($con - > query($jobs_ins)) {
echo "Inserted Row";
} else {
echo "Not Inserted";
}
}