使用JSOUP从HTML表中提取数据

时间:2015-10-08 23:06:58

标签: java jsoup espn

所以我现在正试图从(http://games.espn.go.com/ffl/leaders?)中删除所有数据并存储它。我目前采用的方法是抓取网站从中提取所有数据。然而,在看到这是多么低效之后,我开始对JSOUP进行一些研究。我设法找到关于使用带有espn的JSOUP的stackoverflow帖子。 Using JSoup To Extract HTML Table Contents。我尝试使用相同的方法,但我不知道如何从http://games.espn.go.com/ffl/leaders获取所有表格信息?非常感谢任何帮助!

1 个答案:

答案 0 :(得分:0)

Document doc = Jsoup
            .connect("http://games.espn.go.com/ffl/leaders")
            .userAgent(
                    "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36")
            .ignoreContentType(true).timeout(0).get();


    Elements elements = doc.select("table[class=playerTableTable tableBody]");

    Elements rows = elements.get(0).select("tr[class^=pncPlayerRow]");

    for (Element row : rows) {


        Elements tds = row.select("td");
        if(tds.size() != 24) continue;


        String mPLAYERTEAMPOS =         tds.get(0).text();      
        String mWK_OPP   =              tds.get(2).text();      
        String mWK_STATUSET          =  tds.get(3).text();      
        String mPASSING_CA     =        tds.get(5).text();      
        String mPASSING_YDS     =       tds.get(6).text();      
        String mPASSING_TD     =        tds.get(7).text();      
        String mPASSING_INT       =     tds.get(8).text();      
        String mRUSHING_RUSH       =    tds.get(10).text();         
        String mRUSHING_YDS     =       tds.get(11).text();         
        String mRUSHING_TD         =    tds.get(12).text();                 
        String mRECEIVING_REC      =    tds.get(14).text();         
        String mRECEIVING_YDS       =   tds.get(15).text();         
        String mRECEIVING_TD       =    tds.get(16).text();         
        String mRECEIVING_TAR         = tds.get(17).text();                 
        String mMISC_2PC      =         tds.get(19).text();         
        String mMISC_FUML       =       tds.get(20).text();         
        String mMISC_TD      =          tds.get(21).text();         
        String mTOTAL_PTS    =          tds.get(23).text();         


        System.out.println("mPLAYERTEAMPOS\t\t\t"+ mPLAYERTEAMPOS);
        System.out.println("mWK_OPP \t\t\t"+mWK_OPP);           
        System.out.println("mWK_STATUSET\t\t\t" + mWK_STATUSET);                    
        System.out.println("mPASSING_CA\t\t\t"+mPASSING_CA);            
        System.out.println("mPASSING_YDS\t\t\t"+mPASSING_YDS);              
        System.out.println("mPASSING_TD\t\t\t"+mPASSING_TD);            
        System.out.println("mPASSING_INT\t\t\t"+mPASSING_INT);              
        System.out.println("mRUSHING_RUSH\t\t\t"+mRUSHING_RUSH);            
        System.out.println("mRUSHING_YDS\t\t\t"+mRUSHING_YDS);              
        System.out.println("mRUSHING_TD\t\t\t"+mRUSHING_TD);                        
        System.out.println("mRECEIVING_REC\t\t\t"+mRECEIVING_REC);          
        System.out.println("mRECEIVING_YDS\t\t\t"+mRECEIVING_YDS);              
        System.out.println("mRECEIVING_TD\t\t\t"+mRECEIVING_TD);            
        System.out.println("mRECEIVING_TAR\t\t\t"+mRECEIVING_TAR);                      
        System.out.println("mMISC_2PC\t\t\t"+mMISC_2PC);            
        System.out.println("mMISC_FUML\t\t\t"+mMISC_FUML);              
        System.out.println("mMISC_TD\t\t\t"+mMISC_TD);              
        System.out.println("mTOTAL_PTS\t\t\t"+mTOTAL_PTS);      


    }