我是使用jsoup和html的新手。我想知道如何从谷歌新闻头版的故事中提取标题和链接(如果可能的话)。这是我的代码:
$(function(){
function makeSVG(tag, attrs){
var el = document.createElementNS('http://www.w3.org/2000/svg', tag), k;
for(k in attrs){
el.setAttribute(k, attrs[k]);
}
return el;
}
var trianglesTimeline = new TimelineMax({paused: true, smoothChildTiming: true});
function setAnimations(){
var nbTriangles, i = 0, svg = '', random = 1, thisPath, y, d;
nbTriangles = ($(window).width() - 60)/9 | 0;
for(i; i<nbTriangles; i++){
random = (Math.random()*(2.5-0.5) + 0.5).toFixed(1);
if(i%2 === 0){
y = 40;
d = 'M0 0H2 L1 1Z';
}else{
y = -40;
d = 'M0 1H2 L1 0Z';
}
svg = makeSVG('path', {fill: '#000', d: d, transform:'translate('+i+', '+y+')', 'data-op': random, style: 'opacity:0'});
$('svg').append(svg);
}
i = 0;
for(i; i<nbTriangles; i++){
thisPath = $('path').eq(i);
trianglesTimeline.to(thisPath, 0.3, {opacity: thisPath.data('op'), y: '0px', delay: 0.04*i}, 0);
}
trianglesTimeline.tweenTo( trianglesTimeline.duration() * 0.1 );
}
setAnimations();
});
出于某种原因,我认为我的程序无法找到 org.jsoup.nodes.Document doc = null;
try {
doc = (org.jsoup.nodes.Document) Jsoup.connect("https://news.google.com/").get();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
Elements titles = doc.select("titletext");
System.out.println("Titles: " + titles.text());
//non existent
for (org.jsoup.nodes.Element e: titles) {
System.out.println("Title: " + e.text());
System.out.println("Link: " + e.attr("href"));
}
,因为这是代码运行时的输出:titletext
非常感谢你的帮助,谢谢。
答案 0 :(得分:0)
首先获取以h2 html标签
开头的所有节点/元素Elements elem = html.select("h2");
现在你有元素它有一些子元素(id,href,originalhref等)。在这里,您需要检索所需的这些数据
for(Element e: elem){
System.out.println(e.select("[class=titletext]").text());
System.out.println(e.select("a").attr("href"));
}