我使用casperjs
发布到网址并使用fiddler2
调试我的代码。下面是我的代码(用coffeescript
编写)。
casper = require('casper').create({
waitTimeout : 10000,
verbose : true,
logLevel : 'debug',
userAgent : 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
ignoreSslErrors: true,
viewportSize : {
width : 1080,
height: 1024
}
})
url = "http://www.sample.com/test"
casper.start()
casper.thenOpen(url, {
method: "post",
data : {
a : "aaa",
b : "bbb",
c : "ccc"
},
headers: {
"User-Agent" : "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36777",
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "text/plain, */*"
}
},
->
console.log this.getPageContent()
)
casper.run()
当我运行它时,我得到了以下调试信息:
C:\casperjs\batchbin\cj.bat C:\Users\***\WebstormProjects\haha\test.coffee
[info] [phantom] Starting...
[info] [phantom] Running suite: 2 steps
[debug] [phantom] opening url: http://www.sample.com/test, HTTP POST
[debug] [phantom] Navigation requested: url=http://www.sample.com/test, type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "http://www.sample.com/test"
[debug] [phantom] Successfully injected Casper client-side utilities
[info] [phantom] Step anonymous 2/2 http://www.sample.com/test (HTTP 200)
<html xmlns="http://www.w3.org/1999/xhtml"><body><parsererror style="display: block; white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black"><h3>This page contains the following errors:</h3><div style="font-family:monospace;font-size:12px">error on line 1 at column 1: Start tag expected.</div><h3>Below is a rendering of the page up to the first error.</h3></parsererror></body></html>
[info] [phantom] Step anonymous 2/2: done in 657ms.
[info] [phantom] Done 2 steps in 675ms
Process finished with exit code 0
然而,来自fiddler,我的帖子请求是:
POST http://www.sample.com/test HTTP/1.1
Origin: null
Content-Length: ***
Accept: text/plain, */*
Content-Type: application/x-www-form-urlencoded
User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36777
Cookie: ***
Connection: Keep-Alive
Accept-Encoding: gzip
Accept-Language: en-US,*
Host: www.sample.com
a=aaa&b=bbb&c=ccc
我的帖子回复是:
HTTP/1.1 200 OK
Cache-Control: private
Content-Type: text/xml; charset=utf-8
Vary: Accept-Encoding
Date: Mon, 08 Dec 2014 14:36:34 GMT
Content-Length: ***
http://www.sample.com/this_is_a_sample_url
请注意,网址 http://www.sample.com/this_is_a_sample_url 中的响应正文。但getPageContent()
给了我一段HTML代码。起初我认为问题可能是由Accept
请求标头引起的。但是,它已经设置为text/plain
而不是HTML。
谁能给我一些建议?
答案 0 :(得分:1)
您的服务器响应可能包含正确的数据,但由于它将URL作为XML返回,因此PhantomJS无法正确解析此问题。这就是为什么错误显示为HTML页面。
您应该专门使用__utils__.sendAJAX
下载内容(使用JavaScript):
casper.post = function(url, data){
return this.evaluate(function(targetURL){
return __utils__.sendAJAX(targetURL, "POST", data, false, {
overrideMimeType: "text/plain"
});
}, url);
};
casper.start("http://example.com").then(function(){
var content = this.post(targetURL, {
a : "aaa",
b : "bbb",
c : "ccc"
});
// do something with content
}).run();
在开始时,打开一个虚拟(现有)页面以正确初始化系统。当虚拟域与实际请求域不同时,您还需要使用--web-security=false
命令行选项运行CasperJS。
如果在start
上使用虚拟本地html文件,则应添加--local-to-remote-url-access=true
命令行选项,因为当前URL大约为:空白且AJAX请求仅限于当前域。 / p>