#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
response = requests.get(url)
# parse html
page = str(BeautifulSoup(response.content))
def getURL(page):
"""
:param page: html of web page (here: Python home page)
:return: urls in that page
"""
start_link = page.find("a href")
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1: end_quote]
return url, end_quote
while True:
url, n = getURL(page)
page = page[n:]
if url:
print(url)
else:
break
我正在使用上面的代码来获取网页上所有youtube视频的列表。如果我尝试这样做。我收到以下错误
The code that caused this warning is on line 9 of the file C:/Users/PycharmProjects/ReadCSVFile/venv/Links.py. To get rid of this warning, change code that looks like this:
我做了并开始使用html,但是出现了一些不同的错误。
我正在使用Python 3.0。我正在使用IDE Pycharm。
有人可以帮我吗
答案 0 :(得分:0)
这不是错误,但警告您未设置解析器,解析器可以为'html.parser'
,'lxml'
,'xml'
。更改为
page = BeautifulSoup(response.content, 'html.parser')
您上面的代码实际上没有执行BeautifulSoup
的操作,但是这里是使用它的示例。
#!/usr/bin/env python
import requests
from bs4 import BeautifulSoup
def getURL(url):
"""
:param url: url of web page
:return: urls in that page
"""
response = requests.get(url)
# parse html
page = BeautifulSoup(response.content, 'html.parser')
link_tags = page.find_all('a')
urls = [x.get('href') for x in link_tags]
return urls
url = "https://www.youtube.com/channel/UCaKt8dvEIPnEHWSbLYhzrxg/videos"
all_url = getURL(url)
print('\n'.join(all_url))