我试图使用BeautifulSoup获取所有链接,并与example.com等域名进行比较。任何人都可以在这方面提供帮助吗?
答案 0 :(得分:0)
我找到了解决方案,这里是它的python代码:
import urllib.request as Ureq
import requests
from bs4 import BeautifulSoup as bs
from urllib.parse import urlparse
def task(url):
x=Ureq.urlopen(url)
print ("Total Size of the Web Page = ",len(x.read())," Bytes")
url_p=urlparse(url)
domain='{uri.scheme}://{uri.netloc}/'.format(uri=url_p)
print (domain)
resp=requests.get(url)
soup=bs(resp.text,'html.parser')
r=0
for link in soup.find_all('a'):
temp=link.get('href')
if temp is not None and domain in temp:
print (temp)
r=r+1
print ("Total links pointing to same domain = ",r)
URl_User="abc"
URL_User=input('Enter URl : ')
task(URL_User)