如何从网页上获取运动员的头像

时间:2012-02-03 06:24:27

标签: android html jsoup

我想根据玩家的选择显示运动员的照片。我正在使用JSOUP来检索其他信息,如统计数据等...我很困难,因为html不断改变每个玩家。我可以提供html的名称,但我不知道选择的evey播放器的ID。任何想法?

HTNL示例:http://www.rotoworld.com/player/nba/784/zach-randolph/1

我可以在选择播放器时推送名称(zach-randolph)但不是他的代码(784)......

更新 - 也许我可以对名称进行谷歌图片搜索并检索任何知道如何操作的第一个结果?

CODE:

public class ImagetestdroidActivity extends Activity {
/** Called when the activity is first created. */
ImageView image = null;
//ImageView image2 = null;
@Override
public void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.main);
    image = (ImageView)findViewById(R.id.imageView1);
    //image2 = (ImageView)findViewById(R.id.imageView2);
    GetPlayerTask fetch = new GetPlayerTask();
 String phrase = "Lebron James";
    String delims = "[ ]+";
    String[] tokens = phrase.split(delims);
    for (int i = 0; i < tokens.length; i++)
    {
        System.out.println("Token"+i+":"+tokens[i]);
    }
    fetch.execute(tokens[0], tokens[1], "nba");

}

public String getimage(String html) throws IOException
{
     try {
            Document doc = Jsoup.connect(html).get();
            Element e = doc.select("div.playerphoto > img").first();
            // If you want absolute path
            String imgSrcAbs =  e.attr("abs:src");      
            System.out.println(imgSrcAbs);
            // Or , If you want relative path
            String imgSrcRelative =  e.attr("src"); 
            System.out.println(imgSrcRelative);
            return imgSrcAbs;
            } 
        catch (IOException e) {             
        }

    return null;
}
public void onPlayerFound(String result) throws IOException {
    // TODO Auto-generated method stub
     System.out.println("in ONPLAYERFOUNFD"+result);
     String imagehtml= getimage("http://www.rotoworld.com/"+result+"/1");
     try 
     {
         System.out.println("IMG HTML "+imagehtml);
         URL feedImage = new URL(imagehtml);
         HttpURLConnection conn= (HttpURLConnection)feedImage.openConnection();
         InputStream is = conn.getInputStream();

        Bitmap img = BitmapFactory.decodeStream(is);
        image.setImageBitmap(img);

     } 
     catch (MalformedURLException e) 
     {
         e.printStackTrace();
     } 
     catch (IOException e) 
     {
         e.printStackTrace();
     }

}

class GetPlayerTask extends AsyncTask<String, Void, String> {
    private final String TAG = null;
    AndroidHttpClient   mClient = AndroidHttpClient.newInstance(
            "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.24) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24");
    @Override
    protected String doInBackground(String... params) {
        String result = null;
        String url = Uri.parse("http://www.rotoworld.com").buildUpon()
                .appendEncodedPath("content/playersearch.aspx")
                .appendQueryParameter("searchname", params[0] + " " + params[1])
                .build().toString();
        HttpPost post = new HttpPost(url);
        post.addHeader("Referer", "http://www.rotoworld.com");

        try {
            List<NameValuePair> parameters = new ArrayList<NameValuePair>();
            parameters.add(new BasicNameValuePair("ctl00$cp1$btnAdvancedSearch", "Search"));
            parameters.add(new BasicNameValuePair("ctl00$cp1$radSportSearch", params[2].toUpperCase()));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbFirstNameSearch", params[0]));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbLastNameSearch", params[1]));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbHeaderSearchBox", "LAST NAME, FIRST NAME"));
            parameters.add(new BasicNameValuePair("ctl00$cp1$headlinesNFL$hideHeadlineSport", ""));
            parameters.add(new BasicNameValuePair("ctl00$cp1$siteheader$hidpage", ""));
            parameters.add(new BasicNameValuePair("__EVENTARGUMENT", ""));
            parameters.add(new BasicNameValuePair("__EVENTTARGET", ""));
            parameters.add(new BasicNameValuePair("__EVENTVALIDATION", "/wEWEALJp4KIBAKHlvL3BgLA+sClCQK5vLryBgKn1MPhBAK9kM36BQKj89HmAwLA+vrmBAKk7ayNDgKj85nnAwKU87XnAwKurM6nDAK++qLmBAKD2r2iBgKQ+47mAgK//t/aB6qbH1ovSUf6LkMO7LTmIW5EbRu5"));
            parameters.add(new BasicNameValuePair("__VIEWSTATE", "/wEPDwUJMjg1NjcxOTA2D2QWAmYPZBYEAgEPZBYCAhwPFgIeBFRleHQF3AE8c2NyaXB0IGxhbmd1YWdlPSdqYXZhc2NyaXB0JyB0eXBlPSd0ZXh0L2phdmFzY3JpcHQnIHNyYz0naHR0cDovL2FqYXguZ29vZ2xlYXBpcy5jb20vYWpheC9saWJzL2pxdWVyeS8xLjQuMi9qcXVlcnkubWluLmpzJz48L3NjcmlwdD48c2NyaXB0IGxhbmd1YWdlPSdqYXZhc2NyaXB0JyB0eXBlPSd0ZXh0L2phdmFzY3JpcHQnIHNyYz0nL3psaWJzL2ZseW91dG5hdi5qcyc+PC9zY3JpcHQ+ZAIDD2QWAgIBD2QWAgIDD2QWBAIFD2QWBgIBDxYCHwAFDVRvcCBIZWFkbGluZXNkAgIPDxYCHgdWaXNpYmxlaGRkAgQPD2QPEBYBZhYBFgIeDlBhcmFtZXRlclZhbHVlZRYBZmRkAgsPZBYCAgEPFgIfAAUbQ2hhbXBpb25zaGlwIEV2ZW50cyBUaWNrZXRzZGRb2hpHPON4Q4VOuUHYhpgRZg0o4Q=="));
            post.setEntity(new UrlEncodedFormEntity(parameters));

            HttpResponse response = mClient.execute(post);
            if (response.getStatusLine().getStatusCode() == 302) {
                Header location = response.getFirstHeader("Location");
                if (location != null) {
                    result = location.getValue();
                }
            }
        } catch (IOException e) {
            Log.e(TAG, "Unable perform request", e);
        }

        return result;
    }
    @Override
    protected void onPostExecute(String result) {
        // Call whatever method you want to notify your Activity
        // that you've found the player in question
        try {
            System.out.println("getplayer"+result);
            onPlayerFound(result);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
}

2 个答案:

答案 0 :(得分:0)

您正在抓取的网站上有一个搜索功能,您可以使用HttpClient和HttpPost进行模拟,如以下示例所示。

首先,使用HttpClient - 在本例中为AndroidHttpClient。我们模仿真实浏览器的User-Agent以防万一:

mClient = AndroidHttpClient.newInstance(
            "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.24) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24", this);

然后 - 将执行网络操作的AsyncTask:

class GetPlayerTask extends AsyncTask<String, Void, String> {
    @Override
    protected String doInBackground(String... params) {
        String result = null;
        String url = Uri.parse("http://www.rotoworld.com").buildUpon()
                .appendEncodedPath("content/playersearch.aspx")
                .appendQueryParameter("searchname", params[0] + " " + params[1])
                .build().toString();
        HttpPost post = new HttpPost(url);
        post.addHeader("Referer", "http://www.rotoworld.com");

        try {
            List<NameValuePair> parameters = new ArrayList<NameValuePair>();
            parameters.add(new BasicNameValuePair("ctl00$cp1$btnAdvancedSearch", "Search"));
            parameters.add(new BasicNameValuePair("ctl00$cp1$radSportSearch", params[2].toUpperCase()));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbFirstNameSearch", params[0]));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbLastNameSearch", params[1]));
            parameters.add(new BasicNameValuePair("ctl00$cp1$tbHeaderSearchBox", "LAST NAME, FIRST NAME"));
            parameters.add(new BasicNameValuePair("ctl00$cp1$headlinesNFL$hideHeadlineSport", ""));
            parameters.add(new BasicNameValuePair("ctl00$cp1$siteheader$hidpage", ""));
            parameters.add(new BasicNameValuePair("__EVENTARGUMENT", ""));
            parameters.add(new BasicNameValuePair("__EVENTTARGET", ""));
            parameters.add(new BasicNameValuePair("__EVENTVALIDATION", "/wEWEALJp4KIBAKHlvL3BgLA+sClCQK5vLryBgKn1MPhBAK9kM36BQKj89HmAwLA+vrmBAKk7ayNDgKj85nnAwKU87XnAwKurM6nDAK++qLmBAKD2r2iBgKQ+47mAgK//t/aB6qbH1ovSUf6LkMO7LTmIW5EbRu5"));
            parameters.add(new BasicNameValuePair("__VIEWSTATE", "/wEPDwUJMjg1NjcxOTA2D2QWAmYPZBYEAgEPZBYCAhwPFgIeBFRleHQF3AE8c2NyaXB0IGxhbmd1YWdlPSdqYXZhc2NyaXB0JyB0eXBlPSd0ZXh0L2phdmFzY3JpcHQnIHNyYz0naHR0cDovL2FqYXguZ29vZ2xlYXBpcy5jb20vYWpheC9saWJzL2pxdWVyeS8xLjQuMi9qcXVlcnkubWluLmpzJz48L3NjcmlwdD48c2NyaXB0IGxhbmd1YWdlPSdqYXZhc2NyaXB0JyB0eXBlPSd0ZXh0L2phdmFzY3JpcHQnIHNyYz0nL3psaWJzL2ZseW91dG5hdi5qcyc+PC9zY3JpcHQ+ZAIDD2QWAgIBD2QWAgIDD2QWBAIFD2QWBgIBDxYCHwAFDVRvcCBIZWFkbGluZXNkAgIPDxYCHgdWaXNpYmxlaGRkAgQPD2QPEBYBZhYBFgIeDlBhcmFtZXRlclZhbHVlZRYBZmRkAgsPZBYCAgEPFgIfAAUbQ2hhbXBpb25zaGlwIEV2ZW50cyBUaWNrZXRzZGRb2hpHPON4Q4VOuUHYhpgRZg0o4Q=="));
            post.setEntity(new UrlEncodedFormEntity(parameters));

            HttpResponse response = mClient.execute(post);
            if (response.getStatusLine().getStatusCode() == 302) {
                Header location = response.getFirstHeader("Location");
                if (location != null) {
                    result = location.getValue();
                }
            }
        } catch (IOException e) {
            Log.e(TAG, "Unable perform request", e);
        }

        return result;
    }
    @Override
    protected void onPostExecute(String result) {
        // Call whatever method you want to notify your Activity
        // that you've found the player in question
        onPlayerFound(result);
    }
}

上面的BasicNameValuePair在使用FireBug的搜索页面上用2-3次努力进行了刮擦。 “成功搜索”的响应是重定向消息(302),其中包含播放器页面的位置 - 即下载图像所需的标识符。

GetPlayerTask将像这样使用:

    GetPlayerTask fetch = new GetPlayerTask();
    // FirstName, LastName, Category (NBA/etc./etc.)
    fetch.execute("Gerald", "Henderson", "nba");

这将返回相对于www.rotoworld.com的路径,例如

"/player/nba/1614/gerald-henderson"

答案 1 :(得分:0)

试试这个:

String url = "http://www.rotoworld.com/player/nba/784/zach-randolph/1";
    try {
        Document doc = Jsoup.connect(url).get();
        Element e = doc.select("div.playerphoto > img").first();
        // If you want absolute path
        String imgSrcAbs =  e.attr("abs:src");      
        System.out.println(imgSrcAbs);
        // Or , If you want relative path
        String imgSrcRelative =  e.attr("src"); 
        System.out.println(imgSrcRelative);

        } 
    catch (IOException e) {             
    }