JSoup:提取文本并将其存储在多个变量中

时间:2013-12-25 22:43:37

标签: java android html jsoup

我有以下情况:

我想提取“span”标签之间的所有文字

<div class="examplediv">
<ul>
<li><span>1</span></li>
<li><span>2</span></li>
<li><span>3</span></li>
<li><span>4</span></li>                                     
</ul>
</div>

<div class="examplediv">
<ul>
<li><span>5</span></li>
<li><span>6</span></li>
<li><span>7</span></li>
<li><span>8</span></li>                                     
</ul>
</div>

<div class="examplediv">
<ul>
<li><span>9</span></li>
<li><span>10</span></li>
<li><span>11</span></li>
<li><span>12</span></li>                                     
</ul>
</div>

<div class="examplediv">
<ul>
<li><span>13</span></li>
<li><span>14</span></li>
<li><span>15</span></li>
<li><span>16</span></li>                                     
</ul>
</div>

所有第一行将存储在变量“A”中,所有第二行都存储在“B”中,所有第三行都存储在“C”中,依此类推。

我的Java代码如下:

File input = new File("/tmp/input.html");
Document doc = Jsoup.parse(input, "UTF-8", "http://example.com/");
Elements exa = doc.select(".examplediv span");
if (exa != null) {
for (int i = 0; i < exa.size(); i++) {
A[i] = exa.get(i).text();
      }
}

此代码将所有行放在变量“A”中。

A = 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16

我希望它像这样存储:

A = 1,5,9,13
B = 2,6,10,14
C = 3,7,11,15
D = 4,8,12,16

请就如何做到这一点给我一些建议。谢谢,节日快乐。

编辑---------------------------------

我有这个想法:

if (exa != null) {
for (int i = 0; i < exa.size(); i++) {
A[i] = exa.get(0).text();
B[i] = exa.get(1).text();
C[i] = exa.get(2).text();
D[i] = exa.get(3).text();
      }
}

它给了我

A = 1,2,3,4
B = 1,2,3,4
C = 1,2,3,4
D = 1,2,3,4

重复前4个跨度。

我怎么能至少得到:

A = 1,2,3,4
B = 5,6,7,8
C = 9,10,11,12
D = 13,14,15,16

1 个答案:

答案 0 :(得分:0)

使用字符串

    public static void main(String... args) throws IOException {

        try {
            String text = "<div class=\"examplediv\"> <ul> <li><span>1</span></li> <li><span>2</span></li> <li><span>3</span></li> <li><span>4</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>5</span></li> <li><span>6</span></li> <li><span>7</span></li> <li><span>8</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>9</span></li> <li><span>10</span></li> <li><span>11</span></li> <li><span>12</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>13</span></li> <li><span>14</span></li> <li><span>15</span></li> <li><span>16</span></li> </ul> </div>";
            String A = "", B = "", C = "", D = "";

            Document page = Jsoup.parse(text, "UTF-8");

            Elements uls = page.select(".examplediv ul");

            for(Element ul : uls) {
                Elements spans = ul.select("span");
                A += spans.get(0).text() + ",";
                B += spans.get(1).text() + ",";
                C += spans.get(2).text() + ",";
                D += spans.get(3).text() + ",";
            }

            A = A.substring(0, A.length() - 1);
            B = B.substring(0, B.length() - 1);
            C = C.substring(0, C.length() - 1);
            D = D.substring(0, D.length() - 1);

            System.out.println(A);
            System.out.println(B);
            System.out.println(C);
            System.out.println(D);
        } catch(Exception e) {
            e.printStackTrace();
        }
    }

使用StringBuilder

    public static void main(String... args) throws IOException {

        try {
            String text = "<div class=\"examplediv\"> <ul> <li><span>1</span></li> <li><span>2</span></li> <li><span>3</span></li> <li><span>4</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>5</span></li> <li><span>6</span></li> <li><span>7</span></li> <li><span>8</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>9</span></li> <li><span>10</span></li> <li><span>11</span></li> <li><span>12</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>13</span></li> <li><span>14</span></li> <li><span>15</span></li> <li><span>16</span></li> </ul> </div>";
            StringBuilder A = new StringBuilder(), B = new StringBuilder(), C = new StringBuilder(), D = new StringBuilder();

            Document page = Jsoup.parse(text, "UTF-8");

            Elements uls = page.select(".examplediv ul");

            for(Element ul : uls) {
                Elements spans = ul.select("span");
                A.append(spans.get(0).text() + ",");
                B.append(spans.get(1).text() + ",");
                C.append(spans.get(2).text() + ",");
                D.append(spans.get(3).text() + ",");
            }

            A.deleteCharAt(A.length() - 1);
            B.deleteCharAt(B.length() - 1);
            C.deleteCharAt(C.length() - 1);
            D.deleteCharAt(D.length() - 1);

            System.out.println(A);
            System.out.println(B);
            System.out.println(C);
            System.out.println(D);

        } catch(Exception e) {
            e.printStackTrace();
        }
    }

修改

这种行为的原因是因为你使用=而不是+ =所以你覆盖了以前的值。 也许这就是你想要的。

public static void main(String... args) throws IOException {

    try {
        String text = "<div class=\"examplediv\"> <ul> <li><span>1</span></li> <li><span>2</span></li> <li><span>3</span></li> <li><span>4</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>5</span></li> <li><span>6</span></li> <li><span>7</span></li> <li><span>8</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>9</span></li> <li><span>10</span></li> <li><span>11</span></li> <li><span>12</span></li> </ul> </div> <div class=\"examplediv\"> <ul> <li><span>13</span></li> <li><span>14</span></li> <li><span>15</span></li> <li><span>16</span></li> </ul> </div>";
        ArrayList<String> A = new ArrayList<String>(), B = new ArrayList<String>(), C = new ArrayList<String>(), D = new ArrayList<String>();

        Document page = Jsoup.parse(text, "UTF-8");

        Elements uls = page.select(".examplediv ul");

        for(Element ul : uls) {
            Elements lis = ul.select("span");
            A.add(lis.get(0).text());
            B.add(lis.get(1).text());
            C.add(lis.get(2).text());
            D.add(lis.get(3).text());
        }

        for(String str : A) {
            System.out.print(str + " ");
        } System.out.println("");

        for(String str : B) {
            System.out.print(str + " ");
        } System.out.println("");

        for(String str : C) {
            System.out.print(str + " ");
        } System.out.println("");

        for(String str : D) {
            System.out.print(str + " ");
        } System.out.println("");
    } catch(Exception e) {
        e.printStackTrace();
    }
    }