如何使用jsoup与android:
从以下html代码中获取属性和值<div style='width:100px;height:100px;background:#d8a8d8'> </div> Actual Text <br>Text Inside br<br>
<div style='width:100px;height:100px;background:#dda0dd'> </div> This text Too.
我需要的是:
1。两个div标签的背景值 - (即 - “ d8a8d8 ”和“ dda0dd ”)
2。 div之后的文本 - (即 - “实际文本”和“此文本太多。”)
第3。 br标签后的文本,位于第一个div之后(即“ Text Inside br ”)
那么,我该怎么做?
我尝试的是:
String st = "<div style='width:100px;height:100px;background:#d8a8d8'> </div> Actual Text <br>Text Inside br<br>
<div style='width:100px;height:100px;background:#dda0dd'> </div>";
Document doc = Jsoup.parse(s);
ements divs = doc.select("div");
for(Element elem : divs)
{
System.out.println(elem.html()); //get all elements inside div
String ss = elem.attr("style");
Log.d("logjsoup", "\n after Jsoup: " + ss);
}
我在div的样式中得到了所有的值。
如何获得我想要的具体结果?
答案 0 :(得分:0)
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupTest {
public static void main(String[] args) {
String html = "<div style='width:100px;height:100px;background:#d8a8d8'> </div> Actual Text <br>Text Inside br<br> \n"+
"<div style='width:100px;height:100px;background:#dda0dd'> </div> This text Too.";
//parse the html
Document doc = Jsoup.parse(html);
//select the divs
Elements divs = doc.select("div");
// use a regex matcher to get the background values
// pattern to look for all characters between "background:#" and "'"
Pattern p = Pattern.compile("(?<=background:#)(.*)(?=\")");
for(Element e: divs){
Matcher m = p.matcher(e.attributes().toString());
while(m.find()){
// background value
System.out.println(m.group());
}
// text after the div which is the next sibling of the div
System.out.println(e.nextSibling().toString().trim());
if(e.nextElementSibling()!= null){
// text after first br tag; the nextElementsibling returns the br element next sibling of this br is the text after br
System.out.println(e.nextElementSibling().nextSibling().toString());
}
System.out.println("-----------------------");
}
}
}