保存图像的php-curl脚本---实际上是验证图像

时间:2010-04-05 23:37:27

标签: php curl

我有一个名为Curl的卷曲类。

我们假设我有这个代码:

$url = 'http://www.google.com'
$fields = array('q'=>'search term'); //maybe some other arguments. but let's keep it simple.
$curl = new Curl();
$page = $curl->post($url,$fields);

$ page会有一些图像,默认情况下curl不会加载它们。我需要知道如何在不使用卷曲的情况下保存特定图像。一旦我使用$ page = $ curl-> post(..),我需要知道如何保存该图像而不使用其他$ curl-> post(_image_location_)来获取该文件。

需要这样做的原因是从表单中保存验证码图像。我需要访问表单并获取正在加载的特定图像。如果我尝试访问图像的URL,它将是一个不同的验证码图像。

2 个答案:

答案 0 :(得分:1)

你所描述的是不可能的。对于网页内的每个外部资源(即任何不属于HTML内容本身的内容,例如图像,脚本,样式表等),您必须单独请求检索它。这就是所有浏览器的运作方式。

许多验证码都以会话为基础。您对HTML页面的初始请求可能会创建会话cookie,该会话cookie将作为响应标头的一部分发回。在请求图像时,将会出现此cookie。如果您只是对图像执行简单的卷曲请求,则不会发送该cookie,因此您将获得不同的图像。

您必须分析页面并确定正在进行的会话管理类型,并适当地修改您的Curl请求,但正如我所提到的,我怀疑它将基于cookie。您可能希望查看CURLOPT_COOKIEJAR curl_setopt()参数以启动操作。你也可以google pretty straightforward examples

答案 1 :(得分:1)

这是整个班级,如果你有问题我可以更好地解释你。

<?php

/**
 * Description of class-curl
 *
 * @author NEO
 */
class cURL {

    public $headers;
    public $user_agent;
    public $compression;
    public $cookie_file;
    public $proxy;
    public $process;
    public $url;
    public $hash;
    public $content;

    public function __construct($url) {
        $this->url = $url;
        $this->process = curl_init($this->url);
        $cookiename = uniqid('cookie_') . '.txt';
        $this->cURL($cookies = TRUE, $cookiename);
    }

    public function cURL($cookies = TRUE, $cookie = 'cookie.txt', $compression = 'gzip', $proxy = '') {
        $this->headers[] = 'Accept: image/gif, image/x-bitmap, image/jpeg, image/pjpeg';
        $this->headers[] = 'Connection: Keep-Alive';
        $this->headers[] = 'Content-type: application/x-www-form-urlencoded;charset=UTF-8';
        $this->user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0)';
        $this->compression = $compression;
        $this->proxy = $proxy;
        $this->cookies = $cookies;
        if ($this->cookies == TRUE)
            $this->cookie($cookie);
    }

    public function cookie($cookie_file) {
        if (file_exists($cookie_file)) {
            $this->cookie_file = $cookie_file;
        } else {
            fopen($cookie_file, 'w') or $this->error('The cookie file could not be opened. Make sure this directory has the correct permissions');
            $this->cookie_file = $cookie_file;
            @fclose($this->cookie_file);
        }
    }

    //Capturar todo el codigo fuente de la web solicitada
    public function get() {
        curl_setopt($this->process, CURLOPT_HTTPHEADER, $this->headers);
        curl_setopt($this->process, CURLOPT_HEADER, 0);
        curl_setopt($this->process, CURLOPT_USERAGENT, $this->user_agent);
        if ($this->cookies == TRUE) {
            curl_setopt($this->process, CURLOPT_COOKIEFILE, $this->cookie_file);
            curl_setopt($this->process, CURLOPT_COOKIEJAR, $this->cookie_file);
        }
        curl_setopt($this->process, CURLOPT_ENCODING, $this->compression);
        curl_setopt($this->process, CURLOPT_TIMEOUT, 90);
        if ($this->proxy)
            curl_setopt($this->process, CURLOPT_PROXY, $this->proxy);
        curl_setopt($this->process, CURLOPT_RETURNTRANSFER, 1);
        //curl_setopt($this->process, CURLOPT_FOLLOWLOCATION, 1);
        $return = curl_exec($this->process);
        //curl_close($this->process);
        return $return;
    }

    public function post($data) {
        curl_setopt($this->process, CURLOPT_HTTPHEADER, $this->headers);
        curl_setopt($this->process, CURLOPT_HEADER, 1);
        curl_setopt($this->process, CURLOPT_USERAGENT, $this->user_agent);
        if ($this->cookies == TRUE)
            curl_setopt($this->process, CURLOPT_COOKIEFILE, $this->cookie_file);
        if ($this->cookies == TRUE)
            curl_setopt($this->process, CURLOPT_COOKIEJAR, $this->cookie_file);
        curl_setopt($this->process, CURLOPT_ENCODING, $this->compression);
        curl_setopt($this->process, CURLOPT_TIMEOUT, 30);
        if ($this->proxy)
            curl_setopt($this->process, CURLOPT_PROXY, $this->proxy);
        curl_setopt($this->process, CURLOPT_POSTFIELDS, $data);
        curl_setopt($this->process, CURLOPT_RETURNTRANSFER, 1);
        //curl_setopt($this->process, CURLOPT_FOLLOWLOCATION, 1);
        curl_setopt($this->process, CURLOPT_POST, 1);
        $return = curl_exec($this->process);
        //curl_close($this->process);
        return $return;
    }

    public function error($error) {
        echo "<center><div style='width:500px;border: 3px solid #FFEEFF; padding: 3px; background-color: #FFDDFF;font-family: verdana; font-size: 10px'><b>cURL Error</b><br>$error</div></center>";
        die;
    }

    public function grab_image() {
        //obener una imagen desde la url especificada, se puede mejorar para que 
        //se le de una url y me mande todas las imagenes que encuentre
        curl_setopt($this->process, CURLOPT_HEADER, 0);
        curl_setopt($this->process, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($this->process, CURLOPT_BINARYTRANSFER, 1);
        $raw = curl_exec($this->process);
        $name = explode("/", $this->url);
        $name = array_pop($name);
        if (file_exists($name)) {
            unlink($name);
        }
        $fp = fopen($name, 'x');
        fwrite($fp, $raw);
        fclose($fp);
        return $name;
        //return $raw;
    }

    public function cURLclose() {
        unlink($this->cookie_file);
        curl_close($this->process);
        unset($this);
    }

    public function saveCaptcha($source) {
        preg_match('/ipt" src="(h[^"]+)/', $source, $result);
        $captcha = $this->get($result[1]);
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $result[1]);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            $captcha = curl_exec($ch);
            curl_close($ch);
        $hash = explode("challenge :", $captcha);
        $hash1 = explode("'", $hash[1]);
        $cont = $hash1[1];
        $img = 'http://www.google.com/recaptcha/api/image?c=' . $cont;
            $ch = curl_init($img);
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_BINARYTRANSFER,1);
            $rawdata=curl_exec($ch);
            curl_close($ch);
            $name = uniqid('captcha_');
            $fp = fopen("$name.jpg",'w');
            fwrite($fp, $rawdata); 
            fclose($fp);
        //ese cont hay que guardarlo en BD y generar otro para la imagen
        //$picturename = $this->grab_image1($img);
        $picturename = $name.".jpg";
        $picture = array('name' => $picturename, 'hash' => $cont);
        return $picture;
    }
}

?>

所以你只调用Curl Class:

include 'class-Curl.php';

//Pedir un nuevo captcha con una nueva cookie
$url = 'http://lok.myvnc.com/insertar-anuncio.html';
//Crear el objeto Curl
$captcha = new cURL($url);
//Capturar el codigo funte de la pagina
$source = $captcha->get();
//Parsear el codigo javascripts del captcha y bajarla al disco
$captchaimg = $captcha->saveCaptcha($source);
//Guardar en Base de Datos las variables ID, picturename, picturehash, cookie


var_dump($captchaimg);
?>
<IMG src="<?php echo $_SERVER['DOCUMENT_ROOT']."/sms/".$captchaimg['name'] ?>">