如何使用phantomjs从站点下载图像

时间:2013-05-23 14:33:56

标签: coffeescript phantomjs

我想从网站保存一些图片。目前我可以获得图像的路径,但我不知道如何使用幻像获取和保存图像。

findRotationTeaserImages = ->
  paths = page.evaluate ->
    jQuery('.rotate img').map(-> return this.src).get()

  for path, i in paths
    console.log(path);
    //save the image

5 个答案:

答案 0 :(得分:17)

我知道这是一个老问题,但你只需将每个图像的尺寸和位置存储在一个对象中,然后改变phantomjs page.clipRect,以便page.render()方法只渲染它。图像所在的区域。这是一个示例,从http://dribbble.com/抓取多个图像:

var page = require('webpage').create();

page.open('http://dribbble.com/', function() {

    page.includeJs('//ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js',function() {

        var images = page.evaluate(function() {
            var images = [];
            function getImgDimensions($i) {
                return {
                    top : $i.offset().top,
                    left : $i.offset().left,
                    width : $i.width(),
                    height : $i.height()
                }
            }
            $('.dribbble-img img').each(function() {
                var img = getImgDimensions($(this));
                images.push(img);
            });

            return images;
        });

        images.forEach(function(imageObj, index, array){
            page.clipRect = imageObj;
            page.render('images/'+index+'.png')
        });

        phantom.exit();
    });
});

答案 1 :(得分:9)

现在有另一种方法可以做到这一点。

var fs = require("fs");
var imageBase64 = page.evaluate(function(){
  var canvas = document.createElement("canvas");
  canvas.width =img.width;
  canvas.height =img.height;
  var ctx = canvas.getContext("2d");
  ctx.drawImage(img, 0, 0);      
  return canvas.toDataURL ("image/png").split(",")[1];
})
fs.write("file.png",atob(imageBase64),'wb');

答案 2 :(得分:5)

通过启动运行下载映像的节点脚本的子进程来解决此问题:

phantomJs脚本:

findRotationTeaserImages = ->
  paths = page.evaluate ->
    jQuery('.rotate img').map(-> return this.src).get()

  args = ('loadRotationTeaser.js ' + paths.join(' ')).split(' ')

  child_process.execFile("node", args, null, (err, stdout, stderr) ->
    phantom.exit()
  )

nodeJs脚本

http = require('http-get');

args = process.argv.splice(2)

for path, i in args
  http.get path, 'public/images/rotationTeaser/img' + i + '.jpeg', (error, result) ->

答案 3 :(得分:0)

如果图像尺寸已知:



    var webPage = require('webpage');

    /**
     * Download image with known dimension.
     * @param src   Image source
     * @param dest  Destination full path
     * @param width Image width
     * @param height    Image height
     * @param timeout   Operation timeout
     * @param cbk   Callback (optional)
     * @param cbkParam  Parameter to pass back to the callback (optional)
     */
    function downloadImg(src, dest, width, height, timeout, cbk, cbkParam) {
        var page = webPage.create();

        page.settings.resourceTimeout = timeout; //resources loading timeout(ms)
        page.settings.webSecurityEnabled = false; //Disable web security
        page.settings.XSSAuditingEnabled = false; //Disable web security

        page.open(src, function(status) {

            // missing images sometime receive text from server
            var success = status == 'success' && !page.plainText;

            if (success) {
                page.clipRect = {
                    top: 0,
                    left: 0,
                    width: width,
                    height: height
                };
                page.render(dest);

            }

            cbk && cbk(success, cbkParam);
            page.close();
        });
    };


答案 4 :(得分:0)

使用render方法时,我遇到了很多麻烦。幸运的是,我终于想出了两个更好的解决方案。这是我在项目中使用的代码。第一种解决方案在更新cookie时遇到一些麻烦,因此在获取验证码图像时效果不佳。这两种方法都会导致新的http请求。但是经过一些修改,第二个可以省略这种请求。

第一个从phantomJs获取Cookie并使用request发出新的http请求。第二个使用base64传递图像。

 async download(download_url, stream) {
    logger.profile(`download(download_url='${download_url}')`);
    let orig_url = await this.page.property('url');
    download_url = url.resolve(orig_url, download_url);
    let cookies = await this.page.property('cookies');
    let jar = request.jar();
    for (let cookie of cookies) {
        if (cookie.name !== undefined) {
            cookie.key = cookie.name;
            delete cookie.name;
        }
        if (cookie.httponly !== undefined) {
            cookie.httpOnly = cookie.httponly;
            delete cookie.httponly;
        }
        if (cookie.expires !== undefined)
            cookie.expires = new Date(cookie.expires);
        jar.setCookie(new Cookie(cookie), download_url, {ignoreError: true});
    }
    let req = request({
        url: download_url,
        jar: jar,
        headers: {
            'User-Agent': this.user_agent,
            'Referer': orig_url
        }
    });
    await new Promise((resolve, reject) => {
        req.pipe(stream)
            .on('close', resolve)
            .on('error', reject);
    });
    // Due to this issue https://github.com/ariya/phantomjs/issues/13409, we cannot set cookies back
    // to browser. It is said to be redesigned, but till now (Mar 31 2017), no change has been made.
    /*await Promise.all([
        new Promise((resolve, reject) => {
            req.on('response', () => {
                jar._jar.store.getAllCookies((err, cookies) => {
                    if (err) {
                        reject(err);
                        return;
                    }
                    cookies = cookies.map(x => x.toJSON());
                    for (let cookie of cookies) {
                        if (cookie.key !== undefined) {
                            cookie.name = cookie.key;
                            delete cookie.key;
                        }
                        if (cookie.httpOnly !== undefined) {
                            cookie.httponly = cookie.httpOnly;
                            delete cookie.httpOnly;
                        }
                        if (cookie.expires instanceof Date) {
                            cookie.expires = cookie.expires.toGMTString();
                            cookie.expiry = cookie.expires.toTime();
                        }
                        else if (cookie.expires == Infinity)
                            delete cookie.expires;
                        delete cookie.lastAccessed;
                        delete cookie.creation;
                        delete cookie.hostOnly;
                    }
                    this.page.property('cookies', cookies).then(resolve).catch(reject);
                });
            }).on('error', reject);
        }),
        new Promise((resolve, reject) => {
            req.pipe(fs.createWriteStream(save_path))
                .on('close', resolve)
                .on('error', reject);
        })
    ]);*/
    logger.profile(`download(download_url='${download_url}')`);
}
async download_image(download_url, stream) {
    logger.profile(`download_image(download_url='${download_url}')`);
    await Promise.all([
        new Promise((resolve, reject) => {
            this.client.once('donwload image', data => {
                if (data.err)
                    reject(err);
                else
                    stream.write(Buffer.from(data.data, 'base64'), resolve);

            });
        }),
        this.page.evaluate(function (url) {
            var img = new Image(), callback = function (err, data) {
                callPhantom({
                    event: 'donwload image',
                    data: {
                        err: err && err.message,
                        data: data
                    }
                });
            };
            img.onload = function () {
                var canvas = document.createElement("canvas");
                canvas.width = img.width;
                canvas.height = img.height;
                canvas.getContext("2d").drawImage(img, 0, 0);
                callback(null, canvas.toDataURL("image/png").replace(/^data:image\/(png|jpg);base64,/, ""));
            };
            img.onerror = function () {
                callback(new Error('Failed to fetch image.'));
            };
            img.src = url;
        }, download_url)
    ]);
    logger.profile(`download_image(download_url='${download_url}')`);
}