更新:我正在抓取的网站为我提供了获取所需数据的另一种方式,所以我不再遇到这个问题了。但是出于教育原因,我仍然对解决方案感兴趣。
我想使用cURL通过POST将数据传递给JSON服务器。
我不确定如何格式化cURL的变量。
这不起作用(返回null):
define('POSTVARS', 'json='.urlencode('{"cid":"2623","strQuery":"","strValues":"undefined","currentPage":"0","pageSize":"-1","pageSort":"-1","countryId":"2","maxResultCount":""}'));
也不是这个
define('POSTVARS', "json=%7B'cid'%3A'2623'%2C%20'strQuery'%3A''%2C%20'strValues'%3A'undefined'%2C%20'currentPage'%3A'0'%2C%20'pageSize'%3A'-1'%2C'pageSort'%3A'-1'%2C'countryId'%3A'2'%2C'maxResultCount'%3A''%7D=");
根据Post JSON using Curl我正在设置标题
curl_setopt($ch, CURLOPT_HTTPHEADERS,array('Content-Type: application/json'));
我的脚本适用于常规POST变量,但是当我尝试将数据传递给JSON服务器时,我不知道我是否错误地格式化了数据或者必须提供其他一些额外的参数。
我的尝试的完整代码(格式在下面有点破碎,我在这里粘贴了完整的脚本https://docs.google.com/document/d/1hokE6-oMtcs3MBgPUzPwJvZIWNABc3XjOO2IzbpxDF4/edit?hl=en&authkey=CKXcnv8C):
function get_url( $url, $javascript_loop = 0, $timeout = 5 )
{
$cookie = tempnam ("/tmp", "CURLCOOKIE");
$ch = curl_init(POSTURL);
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_COOKIEJAR, $cookie );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_AUTOREFERER, true );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, false ); # required for https urls
curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_TIMEOUT, $timeout );
curl_setopt( $ch, CURLOPT_MAXREDIRS, 10 );
$postvars=<<<HEREDOC
{'cid':'2623', 'strQuery':"", 'strValues':'undefined', 'currentPage':'0', 'pageSize':'-1','pageSort':'-1','countryId':'2','maxResultCount':''}
HEREDOC;
curl_setopt($ch, CURLOPT_POST ,1);
curl_setopt($ch, CURLOPT_POSTFIELDS ,$postvars);
curl_setopt($ch, CURLOPT_HEADER ,0); // DO NOT RETURN HTTP HEADERS
$arr = array();
array_push($arr, 'Content-Type: application/json; charset=utf-8');
curl_setopt($ch, CURLOPT_HTTPHEADER, $arr);
$content = curl_exec( $ch );
$response = curl_getinfo( $ch );
curl_close ( $ch );
if ($response['http_code'] == 301 || $response['http_code'] == 302)
{
ini_set("user_agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1");
if ( $headers = get_headers($response['url']) )
{
foreach( $headers as $value )
{
if ( substr( strtolower($value), 0, 9 ) == "location:" )
return get_url( trim( substr( $value, 9, strlen($value) ) ) );
}
}
}
if ( ( preg_match("/>[[:space:]]+window\.location\.replace\('(.*)'\)/i", $content, $value) || preg_match("/>[[:space:]]+window\.location\=\"(.*)\"/i", $content, $value) ) &&
$javascript_loop < 5
)
{
return get_url( $value[1], $javascript_loop+1 );
}
else
{
return array( $content, $response );
}
}
// set url
$url="http://us.asos.com/services/srvWebCategory.asmx/GetWebCategories";
$output = get_url($url);
print_r($output);
答案 0 :(得分:0)
尝试使用CURLOPT_HTTPHEADER而不是CURLOPT_HTTPHEADERS。我在another StackOverflow thread中选择了后一个拼写错误,直到someone helped me才弄明白。