from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
'''this program is useful for retrieving useful article link from Google news'''
a = input("first name")
str(a)
p = input("last name")
str(p)
t =("https://news.google.co.in/news/section?cf=all&hl=en&pz=1&ned=in&q=" + a +
"+" + p + "&topicsid=FRONTPAGE&ict=tnv3")
print(t)
html = urlopen(t)
bsObj = BeautifulSoup(html)
nameList = bsObj.findAll("span",{"class":"titletext"})
for name in nameList:
print(name.get_text())
打印(“在此代码中我只获取标题文本,但我想获取相关链接到(所以我必须找到一个标签,其中存在href和titletext,所以我如何获取href和标题文本同时 ”) }
答案 0 :(得分:3)
span
代码(a
)的父代具有href
属性。检索属性值:
...
html = urlopen(t)
bsObj = BeautifulSoup(html)
nameList = bsObj.findAll("span",{"class":"titletext"})
for name in nameList:
print(name.get_text(), name.parent.get('href')) # <----