当前软件堆栈: Casperjs 1.1 beta, Phantomjs 1.9.7, Windows 8.1 64位, Visual Studio 2013
我一直在尝试使用casperjs来自动化一个简单的过程。我想导航到以下网址:
http://financials.morningstar.com/income-statement/is.html?t=TSLA®ion=usa&culture=en-US
从那里开始,我想模拟在表头中找到的Export
按钮上的点击事件,并从os弹出窗口下载.csv
文件。
认为这相对简单是我的愚蠢。到目前为止,我已经能够导航到该页面,确认它是正确的页面,使用CSS
或Xpath Selector
来找到正确的元素。问题是当我使用click
或thenClick
方法运行脚本时没有错误,但是屏幕截图没有显示像弹出窗口等的点击事件。
我继续尝试mouseEvent
方法来模拟点击导出,但这次脚本会运行,但输出会返回Phantom的警告 - Loading resource failed with status=fail (HTTP 200)
。
另外值得注意的是在上述页面中使用Chrome的inspect element
值:
<a href="javascript:SRT_stocFund.Export()" class="rf_export"></a>
我继续深入研究页面文件并找到javascript导出函数的定义:
SRT_stocFund.Export = function () {
//return false;
var params = this.GetPara();
document.location = hostPath+"/ajax/ReportProcess4CSV.html?" + params+"&denominatorView="+denominatorView+"&number="+number;
};
请参阅下面的我当前的代码。如有必要,我可以提供cmd shell调试输出的一些摘录。
var casper = require('casper').create({
pageSettings: {
loadImages: false,
loadplugins: false,
},
verbose: true,
logLevel: 'debug',
userAgent: 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36'
});
phantom.cookiesEnabled = true;
var utils = require('utils');
var fs = require('fs');
var cookies = JSON.stringify(phantom.cookies);
fs.write('cookies.txt', cookies, 644);
var x = require('casper').selectXPath;
// which 3 statements; select from:
// 'income-statement/is'
// 'balance-sheet/bs'
// 'cash-flow/cf'
var statement = 'income-statement/is';
var stock_here = 'TSLA';
// morningstar url
var url = 'http://financials.morningstar.com/' + statement + '/is.html?t=' + stock_here + '®ion=usa&culture=en-US';
// open url
casper.start(url, function () {
this.echo(this.getTitle());
console.log('site load...');
if (this.visible('#sfcontent > div.rf_ctlwrap > div.rf_ctl2_opt > div.exportButton > span > a')) {
this.echo("This element selector expression is FOUND");
} else {
this.echo("This element selector expression is MISSING");
}
}).viewport(1200, 1000);
casper.waitForSelector('#sfcontent > div.rf_ctlwrap > div.rf_ctl2_opt > div.exportButton > span > a', function () {
casper.mouseEvent('click', '#sfcontent > div.rf_ctlwrap > div.rf_ctl2_opt > div.exportButton > span > a');
casper.wait(1000);
casper.capture('clicked.png');
});
casper.run();
答案 0 :(得分:1)
你必须使用casperjs吗?您只需获取CSV的直接网址,这似乎按预期工作:
import urllib2
import csv
import StringIO
url = "http://financials.morningstar.com/ajax/ReportProcess4CSV.html?&t=XNAS:{}®ion=usa&culture=en-US&cur=USD&reportType=is&period={}&dataType=A&order=asc&columnYear=5&rounding=3&view=raw&denominatorView=raw&number=3"
stock = "TSLA"
period = 12 #In months, 12 = annual, 3 = quarterly, etc.
request = urllib2.urlopen(url.format(stock, period))
filename = request.info()['Content-Disposition'][21:-1]
data = request.read()
f = StringIO.StringIO(data)
reader = csv.reader(f)
for row in reader:
print row
对于python 3.4:
import urllib.request
import io
import csv
url = "http://financials.morningstar.com/ajax/ReportProcess4CSV.html?&t=XNAS:{}®ion=usa&culture=en-US&cur=USD&reportType=is&period={}&dataType=A&order=asc&columnYear=5&rounding=3&view=raw&denominatorView=raw&number=3"
stock = "TSLA"
period = 12 #In months, 12 = annual, 3 = quarterly, etc.
request = urllib.request.urlopen(url.format(stock, period))
filename = request.info()['Content-Disposition'][21:-1]
data = str(request.read())
f = io.StringIO(data)
reader = csv.reader(f)
for row in reader:
print(row)