我使用网页抓取模块编写了一个python脚本,将Youtube视频下载为mp3文件
import requests,os,re
import bs4
homedir=os.path.expanduser('~')+'\\Music\\'
regex = re.compile('[^a-zA-Z]')
def getDetailed(url):
videoUrl = {'video': url}
responce = requests.get('http://www.youtubeinmp3.com/download/', params=videoUrl)
soup = bs4.BeautifulSoup(responce.content, 'lxml')
videoTitle = soup.find('span', id='videoTitle').text
videoTitle=regex.sub('',videoTitle)
try:
videoLink = soup.find('a', id='download')['href']
vidList.append([videoTitle,rawUrl+videoLink])
except:
print(videoTitle+' could not be downloaded')
def getTheList(videoListUrl):
global folderName
url='https://www.youtube.com'
soup=bs4.BeautifulSoup(requests.get(videoListUrl).content,'lxml')
folderName=soup.title.text
links = soup.select('a.pl-video-title-link.yt-uix-tile-link.yt-uix-sessionlink.spf-link')
print(len(links),'videos found')
for link in links:
getDetailed(url+link['href'])
def download():
directory = homedir+folderName
if not os.path.exists(directory):
print('creating diretory')
os.makedirs(directory)
print(directory)
i =1
for name,link in vidList:
if os.path.exists(directory+'\\'+name+'.mp3'):
print('skiping '+name+' songs remaining:',len(vidList)-i)
i=i+1
else:
video = requests.get(link)
with open(directory+'\\'+name+ '.mp3', 'wb') as writefile:
writefile.write(video.content)
print('video downloaded, check it out. Remaining videos:',len(vidList)-i)
i=i+1
vidList=[]
folderName=''
rawUrl='http://www.youtubeinmp3.com'
#asking for url,for testing I am keeping a list a default
videoListUrl=str(input('enter the playlist link'))
getTheList(videoListUrl)
print(len(vidList),'vides are ready to download')
download()
此代码会根据this网站下载youtube播放列表中的所有视频。 它工作正常,但有些损坏的错误,即如果列表中有10首歌曲,其中至少有2首会被损坏的文件。
所以,我想知道为什么有些人没有正确下载。我的代码是错的,还是会发生什么?