我有这段HTML:
<li class="hidden-xs-inline">
<a class='page-link' href="/acne-scars-treatment/answers?page=226" data-page="226">
</a>
</li>
如何使用Java和Jsoup获取数字226
?
我使用了这段代码:
Document doc=Jsoup.connect(topic_link).get();
Elements elements = doc.select("li.hidden-xs-inline > a.page-link");
return elements.text();
但它并没有返回我想要的数字!
任何想法!?
答案 0 :(得分:0)
您需要的数据不在元素主体中,因此'text()'不起作用。尝试:
elements.attr("data-page");
欲了解更多信息,请查看: https://jsoup.org/apidocs/org/jsoup/select/Elements.html
答案 1 :(得分:0)
见下文:
import org.apache.http.NameValuePair;
import org.apache.http.client.utils.URLEncodedUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.List;
/**
add following dependencies to pom.xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>
*/
public class JsoupExample {
public static void main(String[] args) throws URISyntaxException {
String HTMLSTring = "<li class=\"hidden-xs-inline\">\n" +
" <a class='page-link' href=\"/acne-scars-treatment/answers?page=226\" data-page=\"226\">\n" +
"</a>\n" +
"</li>";
Document html = Jsoup.parse(HTMLSTring);
Elements hiddenElements = html.body().getElementsByClass("hidden-xs-inline");
Elements pageLinkElements = hiddenElements.get(0).getElementsByClass("page-link");
String href = pageLinkElements.get(0).attr("href");
List<NameValuePair> params = URLEncodedUtils.parse(new URI(href), Charset.forName("UTF-8"));
for (NameValuePair param : params) {
System.out.println(param.getName() + " : " + param.getValue()); //page : 226
}
}
}