我正在尝试使用PhantomJS滚动整个myactivity.google.com。如果您拥有包含搜索记录的Google帐户,则会在向下滚动到底部时知道您的历史记录会动态加载。我试图尽可能地滚动,以便我可以加载所有历史记录,然后我可以抓取数据并将其放入csv格式。 PhantomJS上的以下滚动代码效果很好:
var webpage = require('webpage').create();
webpage.viewportSize = { width: 1280, height: 800 };
webpage.scrollPosition = { top: 0, left: 0 };
webpage.open('https://twitter.com/founddrama', function(status) {
if (status === 'fail') {
console.error('webpage did not open successfully');
phantom.exit(1);
}
var i = 0,
top,
queryFn = function() {
return document.body.scrollHeight;
};
setInterval(function() {
var filename = 'twitter-' + (++i) + '.png';
console.log('Writing ' + filename + '...');
webpage.render(filename);
top = webpage.evaluate(queryFn);
console.log('[' + i + '] top = ' + top);
webpage.scrollPosition = { top: top + 1, left: 0 };
if (i >= 5) {
phantom.exit();
}
}, 3000);
});
终端输出:
Writing twitter-1.png...
[1] top = 5823
Writing twitter-2.png...
[2] top = 5604
Writing twitter-3.png...
[3] top = 10709
Writing twitter-4.png...
[4] top = 16047
Writing twitter-5.png...
[5] top = 21074
但是当我试图让它适应我的时候:
console.log("It started here.");
var system = require('system');
var username = ''; // username for Google account goes here
var password = ''; // password goes here
var page = require('webpage').create();
page.viewportSize = { width: 1920, height: 1080 };
page.scrollPosition = { top: 0, left: 0 };
/*
page.open("http://www.google.com", function start(status) {
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/google_home3.png');
phantom.exit();
});*/
console.log("It is now here.");
page.open('https://accounts.google.com/Login?continue=https://myactivity.google.com/myactivity&hl=en', function () {
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint1.png');
page.evaluate(function (uid) {
var username_field = document.getElementById('Email');
username_field.value = uid;
/* var password_field = document.getElementById('id_password');
password_field.value = pwd;
var form = document.getElementById('login-form'); */
// form.submit();
}, username);
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint2.png');
page.evaluate(function () {
/* var password_field = document.getElementById('id_password');
password_field.value = pwd; */
var form = document.getElementById('gaia_loginform');
form.submit();
});
setTimeout( function() {
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint3.png');
page.evaluate(function (pid) {
var password_field = document.getElementById('Passwd');
password_field.value = pid;
/* var password_field = document.getElementById('id_password');
password_field.value = pwd;
var form = document.getElementById('login-form'); */
// form.submit();
console.log("Check if can log in a page.evaluate");
}, password);
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint4.png');
page.evaluate(function () {
/* var password_field = document.getElementById('id_password');
password_field.value = pwd; */
var form = document.getElementById('gaia_loginform');
console.log("Check if can log in a page.evaluate");
form.submit();
});
setTimeout( function () {
page.render('/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint5.png');
var i = 5,
top,
queryFn = function() {
return document.body.scrollHeight;
};
setInterval(function() {
var filename = '/Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint' + (++i) + '.png';
console.log('Writing ' + filename + '...');
page.render(filename);
top = page.evaluate(queryFn);
console.log('[' + i + '] top = ' + top);
page.scrollPosition = { top: top + 1, left: 0 };
if (i >= 10) {
phantom.exit();
}
}, 3000);
}, 8000);
}, 8000 );
}); //
我有问题。 (请注意,在我的代码中,变量"网页"被替换为"页面")
我的输出:
Writing /Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint6.png...
[6] top = 1080
Writing /Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint7.png...
[7] top = 1080
Writing /Users/jMac-NEW/Documents/FILEMAKER OCLC/login_test15_brkpoint8.png...
[8] top = 1080
我只能在我的目录中找到/ Users / jMac-NEW / Documents / FILEMAKER OCLC / login_test15_brkpoint6.png。
为什么,以及如何解决这个问题?