如何使用Jsoup for android

时间:2017-04-05 16:47:17

标签: android html css

如何使用jsoup与android:

从以下html代码中获取属性和值
<div style='width:100px;height:100px;background:#d8a8d8'>&nbsp;</div> Actual Text <br>Text Inside br<br>
<div style='width:100px;height:100px;background:#dda0dd'>&nbsp;</div> This text Too.

我需要的是:

1。两个div标签的背景值 - (即 - “ d8a8d8 ”和“ dda0dd ”)

2。 div之后的文本 - (即 - “实际文本”和“此文本太多。”)

第3。 br标签后的文本,位于第一个div之后(即“ Text Inside br ”)

那么,我该怎么做?

我尝试的是:

  String st = "<div style='width:100px;height:100px;background:#d8a8d8'>&nbsp;</div> Actual Text <br>Text Inside br<br>
<div style='width:100px;height:100px;background:#dda0dd'>&nbsp;</div>";

 Document doc = Jsoup.parse(s);
 ements divs = doc.select("div");

 for(Element elem : divs)
{
   System.out.println(elem.html()); //get all elements inside div

   String ss =  elem.attr("style");
   Log.d("logjsoup", "\n after Jsoup: " +  ss);
}

我在div的样式中得到了所有的值。

如何获得我想要的具体结果?

1 个答案:

答案 0 :(得分:0)

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JsoupTest {

    public static void main(String[] args) {
        String html = "<div style='width:100px;height:100px;background:#d8a8d8'>&nbsp;</div> Actual Text <br>Text Inside br<br> \n"+
                    "<div style='width:100px;height:100px;background:#dda0dd'>&nbsp;</div> This text Too.";
        //parse the html
        Document doc = Jsoup.parse(html);
        //select the divs
        Elements divs = doc.select("div");
        // use a regex matcher to get the background values
        // pattern to look for all characters between "background:#" and "'"
        Pattern p = Pattern.compile("(?<=background:#)(.*)(?=\")");
        for(Element e: divs){
            Matcher m = p.matcher(e.attributes().toString());
            while(m.find()){
                // background value 
                System.out.println(m.group());
            }
            // text after the div which is the next sibling of the div 
            System.out.println(e.nextSibling().toString().trim());
            if(e.nextElementSibling()!= null){
                // text after first br tag; the nextElementsibling returns the br element next sibling of this br is the text after br
                System.out.println(e.nextElementSibling().nextSibling().toString());
            }
            System.out.println("-----------------------");
        }
    } 
}