谷歌页面排名java api

时间:2013-01-03 07:34:32

标签: java api pagerank

我想获得给定域名的Google网页排名。

我使用以下代码google api代码获取pagerank.I得到了页面排名。今天当我尝试运行相同的文件时我得到了一些问题并且canot获得了正确的页面排名。

import java.net.URLConnection;
import java.net.URL;
import java.io.InputStream;

/**
 * <b>PageRankService provides simple API to Google PageRank Technology</b>
 * <br>
 * PageRankService queries google toolbar webservice and returns a
 * google page rank retrieved from one of the next datacenters on the list.
 * <br>toolbarqueries.google.com
 * <br>64.233.161.100
 * <br>64.233.161.101
 * <br>64.233.177.17
 * <br>64.233.183.91
 * <br>64.233.185.19
 * <br>64.233.189.44
 * <br>66.102.1.103
 * <br>66.102.9.115
 * <br>66.249.81.101
 * <br>66.249.89.83
 * <br>66.249.91.99
 * <br>66.249.93.190
 * <br>72.14.203.107
 * <br>72.14.205.113
 * <br>72.14.255.107
 */
public class PageRankService {

    static private int dataCenterIdx = 0;

    /**
     * List of available google datacenter IPs and addresses
     */
    static final public String [] GOOGLE_PR_DATACENTER_IPS = new String[]{
                "64.233.161.100",
                "64.233.161.101",
                "64.233.177.17",
                "64.233.183.91",
                "64.233.185.19",
                "64.233.189.44",
                "66.102.1.103",
                "66.102.9.115",
                "66.249.81.101",
                "66.249.89.83",
                "66.249.91.99",
                "66.249.93.190",
                "72.14.203.107",
                "72.14.205.113",
                "72.14.255.107",
                "toolbarqueries.google.com",
                };

    /**
     * Default constructor
     */
    public PageRankService() {

    }

    /**
     * Must receive a domain in form of: "http://www.domain.com"
     * @param domain - (String)
     * @return PR rating (int) or -1 if unavailable or internal error happened.
     */
    public int getPR(String domain) {

        int result = -1;
        JenkinsHash jHash = new JenkinsHash();

        String googlePrResult = "";

        long hash = jHash.hash(("info:" + domain).getBytes());

        String url = "http://"+GOOGLE_PR_DATACENTER_IPS[dataCenterIdx]+"/search?client=navclient-auto&hl=en&"+
                "ch=6"+hash+"&ie=UTF-8&oe=UTF-8&features=Rank&q=info:" + domain;

        try {
            URLConnection con = new URL(url).openConnection();
            InputStream is = con.getInputStream();
            byte [] buff = new byte[1024];
            int read = is.read(buff);
            while (read > 0) {
                googlePrResult = new String(buff, 0, read);
                read = is.read(buff);
            }
            googlePrResult = googlePrResult.split(":")[2].trim();
            result = new Long(googlePrResult).intValue();
        } catch (Exception e) {
            e.printStackTrace();
        }

        dataCenterIdx++;
        if (dataCenterIdx == GOOGLE_PR_DATACENTER_IPS.length) {
            dataCenterIdx = 0;
        }

        return result;

    }

    public static void main(String [] args) {
        long start = System.currentTimeMillis();
        PageRankService prService = new PageRankService();
        String domain = "http://www.gmail.com";
        if (args.length > 0) {
            domain = args[0];
        }
        System.out.println("Checking " + domain);
        System.out.println("Google PageRank: " + prService.getPR(domain));
        System.out.println("Took: " + (System.currentTimeMillis() - start) + "ms");
    }
}

但今天我无法获得网页排名。我收到以下错误:

Checking http://www.google.com
Google PageRank: -1
java.io.IOException: Server returned HTTP response code: 504 for URL: http://64.233.161.100/search?client=navclient-auto&hl=en&ch=63513778613&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http://www.google.com
Took: 179711ms
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1625)
    at PageRankService.getPR(PageRankService.java:82)
    at PageRankService.main(PageRankService.java:112)
BUILD SUCCESSFUL (total time: 3 minutes 0 seconds)

我哪里出错了?

1 个答案:

答案 0 :(得分:0)

我不是专家,但尝试使用http://toolbarqueries.google.com/tbr?而不是/ search