如何抓取谷歌网络图像

时间:2012-08-08 03:51:40

标签: image web-crawler

我想抓取谷歌网络图片返回的结果。是否有谷歌提供的工具?我正在建立一个物体识别系统,需要各种主题的培训实例。

3 个答案:

答案 0 :(得分:2)

这可能对您有用,因为Google已弃用其搜索API:

  

Google自定义搜索可让您搜索网站或网站   网站集。利用Google的力量创建搜索   根据您的需求和兴趣量身定制的发动机,并展示结果   在您的网站上。您的自定义搜索引擎可以优先排序或限制   根据您指定的网站搜索结果。

https://developers.google.com/custom-search/

答案 1 :(得分:2)

您可以使用谷歌的Image API 例如:

$url = "https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=stackoverflow";

// sendRequest
// note how referer is set manually
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, /* Enter the URL of your site here */);
$body = curl_exec($ch);
curl_close($ch);

// now, process the JSON string
$json = json_decode($body);
// now have some fun with the results...

更多信息:https://developers.google.com/image-search/v1/jsondevguide#json_snippets_php

答案 2 :(得分:0)

package GoogleImageDownload;

import java.io.*
import java.net.HttpURLConnection;
import java.net.URL;

import javax.net.ssl.HttpsURLConnection;
import org.w3c.dom.*

public class HttpURLConnectionExample {

private final String USER_AGENT = "Chrome/44.0.2403.157";

public static void main(String[] args) throws Exception {

    HttpURLConnectionExample http = new HttpURLConnectionExample();

    System.out.println("Testing 1 - Send Http GET request");
                    String url = "https://www.google.co.in/search?tbm=isch&q=test";

    http.sendGet(url);

    System.out.println("\nTesting 2 - Send Http POST request");
    //http.sendPost();

}

// HTTP GET request
private void sendGet(String url) throws Exception {


    URL obj = new URL(url);
    HttpsURLConnection con = (HttpsURLConnection) obj.openConnection();

    // optional default is GET
    con.setRequestMethod("GET");

    //add request header
    con.setRequestProperty("User-Agent", USER_AGENT);

    int responseCode = con.getResponseCode();
    System.out.println("\nSending 'GET' request to URL : " + url);
    System.out.println("Response Code : " + responseCode);

    BufferedReader in = new BufferedReader(
            new InputStreamReader(con.getInputStream()));
    String inputLine;
    StringBuffer response = new StringBuffer();

    while ((inputLine = in.readLine()) != null) {
        response.append(inputLine);
    }
    in.close();

        //print result
        String Html2Xml = light_html2xml.Html2Xml(response.toString());
        Document convertStringToDocument = DocumentObjectClass.convertStringToDocument(Html2Xml);
        NodeList Images = convertStringToDocument.getElementsByTagName("img");
        for(int i = 0;i<Images.getLength();i++)
        {
             Node node= Images.item(i);
             if (node.getNodeType() == Node.ELEMENT_NODE)
             {   Element elem = (Element) node;

                 if(Integer.parseInt(elem.getAttribute("height").replace("px", ""))>10&&Integer.parseInt(elem.getAttribute("width").replace("px", ""))>10)
                 {
                     System.out.println(elem.getAttribute("src"));
                     try{
                     saveImage(elem.getAttribute("src"),String.valueOf(i));
                     }
                     catch(Exception e){System.err.println(e.getMessage());}

                 }
             }
        }
                    NodeList href = convertStringToDocument.getElementsByTagName("a");
        for(int i = 0;i<href.getLength();i++)
        {
             Node node= href.item(i);
             if (node.getNodeType() == Node.ELEMENT_NODE)
             {   Element elem = (Element) node;

                 if(elem.getAttribute("href")!=null)
                 {

                     try{
                         sendGet(elem.getAttribute("href"));                         }
                     catch(Exception e){System.err.println(e.getMessage());}

                 }
             }
        }            

}


public static void saveImage(String imageUrl,String name) throws IOException {
URL url = new URL(imageUrl);
String fileName = url.getFile();

String destName = new File(".").getAbsolutePath()+"/"+name+".jpg";
System.out.println(destName);

    OutputStream os;
        try (InputStream is = url.openStream()) {
            os = new FileOutputStream(destName);
            byte[] b = new byte[2048];
            int length;
            while ((length = is.read(b)) != -1) {
                os.write(b, 0, length);
}   }
os.close();
}
}