在Web Scrapping Java中排序和显示数据

时间:2017-12-24 09:54:17

标签: java jsoup

我是java的新手我编写的java代码将使用Jsoup库从网站上读取。

package view;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public class WebscrappingCls {
    public WebscrappingCls() {
        super();
    }

    public static void main(String[] args) {

        String baseURL = "https://www.nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=25JAN2018";
        try {
            Document doc = Jsoup.connect(baseURL).get();
            String Title = doc.title();
            System.out.println(Title);    


            Elements links = doc.select("th");

            for (Element link : links) {
                System.out.println("\nlink : " + link.attr("th"));  
                System.out.println("text : " + link.text());  
            } 

            Elements links1 = doc.select("td");

            for (Element link : links1) {
                System.out.println("\nlink : " + link.attr("td"));  
                System.out.println("text : " + link.text());  
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}

我能够使用查询中的第一个for循环检索标题数据,但我无法检索下图中提到的数据。

enter image description here

网站的HTMLCode如下。

            <div class="opttbldata">
                <table id="octable" width="100%" border="0" cellpadding="0" cellspacing="0">
                <thead>
                    <tr>
                        <th colspan="11">CALLS</th>
                        <th>&nbsp;</th>
                        <th colspan="11">PUTS</th>
                    </tr>
                    <tr>
                        <!-- <th>Quote </th>   -->
                        <th title="Chart">Chart</th>
                        <th title="Open Interest">OI</th>
                        <th title="Change in Open Interest">Chng in OI</th>
                        <th title="Traded Volume">Volume</th>
                        <th title="Implied Volatility">IV</th>
                        <th title="Last Traded Price">LTP</th>
                        <!--**-->
                        <th title="Net Change">Net Chng</th>

                        <th title="Bid Quantity">Bid<br>Qty</th>
                        <th title="Bid Price">Bid<br>Price</th>
                        <th title="Ask Price">Ask<br>Price</th>
                        <th title="Ask Quantity">Ask<br>Qty</th>
                        <th title="Strike Price">Strike Price</th>
                        <th title="Bid Quantity">Bid<br>Qty</th>
                        <th title="Bid Price">Bid<br>Price</th>
                        <th title="Ask Price">Ask<br>Price</th>
                        <th title="Ask Quantity">Ask<br>Qty</th>
                        <th title="Net Change">Net Chng</th>                            
                        <th title="Last Traded Price">LTP</th>
                        <th title="Implied Volatility">IV</th>
                        <th title="Traded Volume">Volume</th>   
                        <th title="Change in Open Interest">Chng in OI</th>
                        <th title="Open Interest">OI</th>
                        <th title="Chart">Chart</th>
                    </tr>
                    </thead>


                    <tr>

<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>
<td><a href="javascript:popup1('','','','','CE')"><img src="/images/print3.gif"></a>

</td>-->


                    <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '28DEC2017', '3500.00','CE','NIFTY 50');"><img src="/live_market/resources/images/grficon.gif" alt="Graph" /></a></td>
                        <td class="ylwbg"> 316,500</td>
                        <td class="ylwbg"> -64,425</td>
                        <td class="ylwbg"> 864</td>
                        <!-- Added By Swapnil IV-->
                        <td class="ylwbg"> 332.90</td>
                        <!-- End-->
                        <td class="ylwbg">


                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=3500.00&type=CE&expiry=28DEC2017" target="_blank"> 7,002.75</a>

                        </td>
                        <!--*Net Change*-->

                        <td class="ylwbg" Style="color:Green;"> 37.05</td>

                        <td class="ylwbg"> 150</td>
                        <td class="ylwbg"> 6,997.20</td>
                        <td class="ylwbg"> 7,016.20</td>
                        <td class="ylwbg"> 75</td>
                        <td class="grybg"><a href="/live_market/dynaContent/live_watch/option_chain/optionDates.jsp?symbol=NIFTY&instrument=OPTIDX&strike=3500.00"><b>3500.00</b></a></td>
                        <td class="nobg">-</td>
                        <td class="nobg">-</td>
                        <td class="nobg"> 1.10</td>
                        <td class="nobg"> 7,500</td>

                        <!--*Net Change*-->

                            <td class="nobg" Style="color:Green;"> 0.65</td>


                        <td class="nobg">

                            <!-- <a href="javascript:popup1('NIFTY','OPTIDX','28DEC2017','3500.00','PE')"> 0.75</a> -->

                            <a href="/live_market/dynaContent/live_watch/get_quote/GetQuoteFO.jsp?underlying=NIFTY&instrument=OPTIDX&strike=3500.00&type=PE&expiry=28DEC2017" target="_blank"> 0.75</a>



                        </td>
                        <!-- Added By Swapnil -->
                        <td class="nobg"> 283.24</td>
                        <!-- End Added By Swapnil -->
                        <td class="nobg"> 79</td>
                        <td class="nobg"> 375</td>

                        <td class="nobg"> 13,425</td>

                        <td><a href="javascript:chartPopup('NIFTY', 'OPTIDX', '28DEC2017', '3500.00','PE','NIFTY 50');"><img src="/live_market/resources/images/grficon.gif" alt="Graph"/></a></td>

<!--<td><a href="javascript:popup1('','','1')">Quote</a></td>
<td><a href="javascript:popup1('','','','','PE')"><img src="/images/print3.gif"></a></td>-->

                    </tr>


                    <tr>

请你帮我解决我的第二个for循环,以便我可以获取写入数据。

感谢。

1 个答案:

答案 0 :(得分:0)

您尝试查找元素中的所有链接并获取em

的href值
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;


public class WebscrappingCls {
    public WebscrappingCls() {
        super();
    }

    public static void main(String[] args) throws IOException {

        String baseURL = "https://www.nseindia.com/live_market/dynaContent/live_watch/option_chain/optionKeys.jsp?segmentLink=17&instrument=OPTIDX&symbol=NIFTY&date=25JAN2018";
        try {
            Document doc = Jsoup.connect(baseURL).get();
            String Title = doc.title();
            System.out.println(Title);


            Elements links = doc.select("th");

            for (Element link : links) {
                System.out.println("\nlink : " + link.attr("th"));
                System.out.println("text : " + link.text());
            }

            Elements links1 = doc.select("td");

            for (Element link : links1) {
                System.out.println("link " +link.select("a").attr("href")); // <- here
                System.out.println("text : " + link.text());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

    }
}