我有这种奇怪的行为,我无法理解,任何人都能解释一下吗?
我正在尝试使用Nightmare.js从网页上抓取一些信息。 在从DOM中挑选数据之后,我使用一些验证来确保页面上存在特定元素,例如
项目代码:使用第三个SNIPPET
var data = yield nightmare.evaluate(() => {
var list = document.querySelector('#offers_container');
var sorted = list.getElementsByClassName('col-third');
var arr = [];
//var obj = {};
for (var i = 0; i < sorted.length; i++) {
var text = ''; // working
var price_new = '';
var price_old = '';
var image_link = '';
var discount_percent = '';
var extras = [];
// Grabing elements
/* SOMETIMES I GOT ERROR Cannot read property 'innerHTML' of null
if(sorted[i].querySelector('.title')){
text = sorted[i].querySelector('.title').innerHTML;
} else {
text = '';
}
*/
// THIRD SNIPPET EXAMPLE WORKING
text = sorted[i].querySelector('.title')? sorted[i].querySelector('.title').innerHTML : '';
price_new = sorted[i].querySelector('div.discount.price > div.t1 > span.value')? sorted[i].querySelector('div.discount.price > div.t1 > span.value').innerHTML + '.' + sorted[i].querySelector('div.discount.price > div.t1 > span.cents').innerHTML : '';
price_old = sorted[i].querySelector('div.discount.price > div.t2 > span.value')? sorted[i].querySelector('div.discount.price > div.t2 > span.value').innerHTML : '';
image_link = sorted[i].querySelector('div.img > img')? sorted[i].querySelector('div.img > img').getAttribute('src') : '';
discount_percent = sorted[i].querySelector('div.discount.percents > span.value')? sorted[i].querySelector('div.discount.percents > span.value').innerHTML : '';
// Check's extras
var check_extras = sorted[i].querySelector('div.tags.tags_primary');
if(check_extras){
var divs = check_extras.getElementsByTagName('div');
for(var e = 0; e < divs.length; e++){
var data_alt = divs[e].getAttribute('data-alt');
extras.push(data_alt);
}
}
arr.push(
{
text: text,
price_new: price_new,
price_old: price_old,
discount_percent: discount_percent,
image_link: image_link,
extras: extras
});
}
return arr;
});
不工作 有时它传递条件,我认为它返回对象并传递,然后它尝试采取innerHTML它抛出错误,这个元素不存在
if(document.querySelector('some selector')){
var element_data = document.querySelector('some selector').innerHTML;
}
所以这行代码有时会工作,有时候不行,我读到DOM的返回是Object,所以从其他例子中我发现人们写的是这样的:
不工作
if(typeof document.querySelector('some selector') == 'undefined' || !document.querySelector('some selector')){
console.log('No element');
} else {
var element_data = document.querySelector('some selector').innerHTML;
}
这条线也没有按预期工作。
但是我使用这行代码,它运行得很好: 的 WORKS
var element_data = document.querySelector('some selector') ? document.querySelector('some selector').innerHTML : 'no data';
所以if
检查条件相同,我想了解背后的原因?
如果我以旧方式编写代码,代码应如何?
完整的工作代码检查
var Nightmare = require('nightmare');
var vo = require('vo');
vo(run)(function (err, res) {
if (err) console.log(err);
console.dir(res.data);
});
function* run() {
var nightmare = Nightmare({ show: true });
var su = {};
var buttonExists = true;
var buttonVisible = true;
yield nightmare.goto('https://www.maxima.lt/akcijos').wait('body');
while (buttonVisible) {
console.log('Click');
yield nightmare.click('body > div.wrapper.over.offers.cl > div.wrapper_buttons.centered > a');
console.log('waiting 4.5 s');
yield nightmare.wait(4500);
buttonVisible = yield nightmare.visible('body > div.wrapper.over.offers.cl > div.wrapper_buttons.centered > a');
console.log('Is button visible? ' + buttonVisible);
}
//
yield nightmare.wait(4500);
var data = yield nightmare.evaluate(() => {
var list = document.querySelector('#offers_container');
var sorted = list.getElementsByClassName('col-third');
var arr = [];
var obj = {};
for (var i = 0; i < sorted.length; i++) {
var text = '';
var price_new = '';
var price_old = '';
var image_link = '';
var discount_percent = '';
var extras = [];
// THIRD CONDITION CHECK EXAMPLE
text = sorted[i].querySelector('.title') ? sorted[i].querySelector('.title').innerHTML : '';
price_new = sorted[i].querySelector('div.discount.price > div.t1 > span.value') ? sorted[i].querySelector('div.discount.price > div.t1 > span.value').innerHTML + '.' + sorted[i].querySelector('div.discount.price > div.t1 > span.cents').innerHTML : '';
price_old = sorted[i].querySelector('div.discount.price > div.t2 > span.value') ? sorted[i].querySelector('div.discount.price > div.t2 > span.value').innerHTML : '';
image_link = sorted[i].querySelector('div.img > img') ? 'https://www.maxima.lt' + sorted[i].querySelector('div.img > img').getAttribute('src') : '';
discount_percent = sorted[i].querySelector('div.discount.percents > span.value') ? sorted[i].querySelector('div.discount.percents > span.value').innerHTML : '';
// FIRST CONDITION CHECK EXAMPLE
if (sorted[i].querySelector('.title')) {
text = sorted[i].querySelector('.title').innerHTML;
} else {
text = '';
}
if (sorted[i].querySelector('div.discount.price > div.t1 > span.value')) {
price_new = sorted[i].querySelector('div.discount.price > div.t1 > span.value').innerHTML + '.' + sorted[i].querySelector('div.discount.price > div.t1 > span.cents').innerHTML;
} else {
price_new = '';
}
if (sorted[i].querySelector('div.discount.price > div.t2 > span.value')) {
price_old = sorted[i].querySelector('div.discount.price > div.t2 > span.value').innerHTML;
} else {
price_old = '';
}
if (sorted[i].querySelector('div.img > img')) {
image_link = sorted[i].querySelector('div.img > img').getAttribute('src');
} else {
image_link = '';
}
if (sorted[i].querySelector('div.discount.percents > span.value')) {
discount_percent = sorted[i].querySelector('div.discount.percents > span.value').innerHTML;
} else {
discount_percent = ''
}
// Check's extras
var check_extras = sorted[i].querySelector('div.tags.tags_primary');
if (check_extras) {
var divs = check_extras.getElementsByTagName('div');
for (var e = 0; e < divs.length; e++) {
var data_alt = divs[e].getAttribute('data-alt');
extras.push(data_alt);
}
}
arr.push(
{
text: text,
price_new: price_new,
price_old: price_old,
discount_percent: discount_percent,
image_link: image_link,
extras: extras
});
}
return arr;
});
su['data'] = data;
//yield nightmare.end();
return su;
}