我想抓取谷歌网络图片返回的结果。是否有谷歌提供的工具?我正在建立一个物体识别系统,需要各种主题的培训实例。
答案 0 :(得分:2)
这可能对您有用,因为Google已弃用其搜索API:
Google自定义搜索可让您搜索网站或网站 网站集。利用Google的力量创建搜索 根据您的需求和兴趣量身定制的发动机,并展示结果 在您的网站上。您的自定义搜索引擎可以优先排序或限制 根据您指定的网站搜索结果。
答案 1 :(得分:2)
您可以使用谷歌的Image API 例如:
$url = "https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=stackoverflow";
// sendRequest
// note how referer is set manually
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_REFERER, /* Enter the URL of your site here */);
$body = curl_exec($ch);
curl_close($ch);
// now, process the JSON string
$json = json_decode($body);
// now have some fun with the results...
更多信息:https://developers.google.com/image-search/v1/jsondevguide#json_snippets_php
答案 2 :(得分:0)
package GoogleImageDownload;
import java.io.*
import java.net.HttpURLConnection;
import java.net.URL;
import javax.net.ssl.HttpsURLConnection;
import org.w3c.dom.*
public class HttpURLConnectionExample {
private final String USER_AGENT = "Chrome/44.0.2403.157";
public static void main(String[] args) throws Exception {
HttpURLConnectionExample http = new HttpURLConnectionExample();
System.out.println("Testing 1 - Send Http GET request");
String url = "https://www.google.co.in/search?tbm=isch&q=test";
http.sendGet(url);
System.out.println("\nTesting 2 - Send Http POST request");
//http.sendPost();
}
// HTTP GET request
private void sendGet(String url) throws Exception {
URL obj = new URL(url);
HttpsURLConnection con = (HttpsURLConnection) obj.openConnection();
// optional default is GET
con.setRequestMethod("GET");
//add request header
con.setRequestProperty("User-Agent", USER_AGENT);
int responseCode = con.getResponseCode();
System.out.println("\nSending 'GET' request to URL : " + url);
System.out.println("Response Code : " + responseCode);
BufferedReader in = new BufferedReader(
new InputStreamReader(con.getInputStream()));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
//print result
String Html2Xml = light_html2xml.Html2Xml(response.toString());
Document convertStringToDocument = DocumentObjectClass.convertStringToDocument(Html2Xml);
NodeList Images = convertStringToDocument.getElementsByTagName("img");
for(int i = 0;i<Images.getLength();i++)
{
Node node= Images.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE)
{ Element elem = (Element) node;
if(Integer.parseInt(elem.getAttribute("height").replace("px", ""))>10&&Integer.parseInt(elem.getAttribute("width").replace("px", ""))>10)
{
System.out.println(elem.getAttribute("src"));
try{
saveImage(elem.getAttribute("src"),String.valueOf(i));
}
catch(Exception e){System.err.println(e.getMessage());}
}
}
}
NodeList href = convertStringToDocument.getElementsByTagName("a");
for(int i = 0;i<href.getLength();i++)
{
Node node= href.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE)
{ Element elem = (Element) node;
if(elem.getAttribute("href")!=null)
{
try{
sendGet(elem.getAttribute("href")); }
catch(Exception e){System.err.println(e.getMessage());}
}
}
}
}
public static void saveImage(String imageUrl,String name) throws IOException {
URL url = new URL(imageUrl);
String fileName = url.getFile();
String destName = new File(".").getAbsolutePath()+"/"+name+".jpg";
System.out.println(destName);
OutputStream os;
try (InputStream is = url.openStream()) {
os = new FileOutputStream(destName);
byte[] b = new byte[2048];
int length;
while ((length = is.read(b)) != -1) {
os.write(b, 0, length);
} }
os.close();
}
}