使用casperjs后,当我启动此代码时:
var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';
casper.start(url, function() {
var js = this.evaluate(function() {
return document;
});
this.echo(js.all[0].innerHTML);
});
casper.run();
而不是得到这个:
# robots.txt file for YouTube
# Created in the distant future (the year 2000) after
# the robotic uprising of the mid 90's which wiped out all humans.
User-agent: Mediapartners-Google*
Disallow:
User-agent: *
Disallow: /bulletin
Disallow: /comment
Disallow: /forgot
Disallow: /get_video
Disallow: /get_video_info
Disallow: /login
Disallow: /results
Disallow: /signup
Disallow: /t/terms
Disallow: /t/privacy
Disallow: /verify_age
Disallow: /videos
Disallow: /watch_ajax
Disallow: /watch_popup
Disallow: /watch_queue_ajax
我得到了这个结果:
<head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># robots.txt file for YouTube
# Created in the distant future (the year 2000) after
# the robotic uprising of the mid 90's which wiped out all humans.
User-agent: Mediapartners-Google*
Disallow:
User-agent: *
Disallow: /bulletin
Disallow: /comment
Disallow: /forgot
Disallow: /get_video
Disallow: /get_video_info
Disallow: /login
Disallow: /results
Disallow: /signup
Disallow: /t/terms
Disallow: /t/privacy
Disallow: /verify_age
Disallow: /videos
Disallow: /watch_ajax
Disallow: /watch_popup
Disallow: /watch_queue_ajax
</pre></body>
似乎casperjs正在添加html标签。如何将纯文本文件完全作为源文件获取?
答案 0 :(得分:0)
download功能怎么样?
脚本成为
var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';
casper.start(url, function() {
this.download(url, 'robots.txt');
});
casper.run();
<强>更新强>
如果要将远程文件内容存储到字符串中,请使用base64encode
var casper = require('casper').create();
var url = 'https://www.youtube.com/robots.txt';
var contents;
casper.start(url, function() {
contents = atob(this.base64encode(url));
console.log(contents);
});
casper.run();