我无法自动检索Youtube视频。这是代码。问题是最后一部分。 download = urllib.request.urlopen(download_url).read()
# Youtube video download script
# 10n1z3d[at]w[dot]cn
import urllib.request
import sys
print("\n--------------------------")
print (" Youtube Video Downloader")
print ("--------------------------\n")
try:
video_url = sys.argv[1]
except:
video_url = input('[+] Enter video URL: ')
print("[+] Connecting...")
try:
if(video_url.endswith('&feature=related')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=related')[0]
elif(video_url.endswith('&feature=dir')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=dir')[0]
elif(video_url.endswith('&feature=fvst')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=fvst')[0]
elif(video_url.endswith('&feature=channel_page')):
video_id = video_url.split('www.youtube.com/watch?v=')[1].split('&feature=channel_page')[0]
else:
video_id = video_url.split('www.youtube.com/watch?v=')[1]
except:
print("[-] Invalid URL.")
exit(1)
print("[+] Parsing token...")
try:
url = str(urllib.request.urlopen('http://www.youtube.com/get_video_info?&video_id=' + video_id).read())
token_value = url.split('video_id='+video_id+'&token=')[1].split('&thumbnail_url')[0]
download_url = "http://www.youtube.com/get_video?video_id=" + video_id + "&t=" + token_value + "&fmt=18"
except:
url = str(urllib.request.urlopen('www.youtube.com/watch?v=' + video_id))
exit(1)
v_url=str(urllib.request.urlopen('http://'+video_url).read())
video_title = v_url.split('"rv.2.title": "')[1].split('", "rv.4.rating"')[0]
if '"' in video_title:
video_title = video_title.replace('"','"')
elif '&' in video_title:
video_title = video_title.replace('&','&')
print("[+] Downloading " + '"' + video_title + '"...')
try:
print(download_url)
file = open(video_title + '.mp4', 'wb')
download = urllib.request.urlopen(download_url).read()
print(download)
for line in download:
file.write(line)
file.close()
except:
print("[-] Error downloading. Quitting.")
exit(1)
print("\n[+] Done. The video is saved to the current working directory(cwd).\n")
有一条错误消息:(感谢Wooble)
Traceback (most recent call last):
File "C:/Python31/MyLib/DrawingBoard/youtube_download-.py", line 52, in <module>
download = urllib.request.urlopen(download_url).read()
File "C:\Python31\lib\urllib\request.py", line 119, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python31\lib\urllib\request.py", line 353, in open
response = meth(req, response)
File "C:\Python31\lib\urllib\request.py", line 465, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python31\lib\urllib\request.py", line 385, in error
result = self._call_chain(*args)
File "C:\Python31\lib\urllib\request.py", line 325, in _call_chain
result = func(*args)
File "C:\Python31\lib\urllib\request.py", line 560, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Python31\lib\urllib\request.py", line 353, in open
response = meth(req, response)
File "C:\Python31\lib\urllib\request.py", line 465, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python31\lib\urllib\request.py", line 391, in error
return self._call_chain(*args)
File "C:\Python31\lib\urllib\request.py", line 325, in _call_chain
result = func(*args)
File "C:\Python31\lib\urllib\request.py", line 473, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
答案 0 :(得分:18)
原始问题的代码依赖于关于youtube页面和url内容的几个假设(用“url.split('something =')[1]”等结构表示),这些假设可能并非总是如此。我测试了它,甚至可能取决于页面上显示的相关视频。你可能已经绊倒了这些特殊情况。
这是一个更干净的版本,它使用urllib来解析网址和查询字符串,并成功下载视频。为清楚起见,我删除了一些尝试/除了没有做太多但是退出的尝试。顺便提一下,它通过从保存视频的文件名中删除非ascii字符来处理unicode视频标题。它还需要任意数量的youtube网址并将其全部下载。最后,它将其用户代理屏蔽为Chrome for Mac(这是我目前使用的)。
#!/usr/bin/env python3
import sys
import urllib.request
from urllib.request import urlopen, FancyURLopener
from urllib.parse import urlparse, parse_qs, unquote
class UndercoverURLopener(FancyURLopener):
version = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.9 Safari/533.2"
urllib.request._urlopener = UndercoverURLopener()
def youtube_download(video_url):
video_id = parse_qs(urlparse(video_url).query)['v'][0]
url_data = urlopen('http://www.youtube.com/get_video_info?&video_id=' + video_id).read()
url_info = parse_qs(unquote(url_data.decode('utf-8')))
token_value = url_info['token'][0]
download_url = "http://www.youtube.com/get_video?video_id={0}&t={1}&fmt=18".format(
video_id, token_value)
video_title = url_info['title'][0] if 'title' in url_info else ''
# Unicode filenames are more trouble than they're worth
filename = video_title.encode('ascii', 'ignore').decode('ascii').replace("/", "-") + '.mp4'
print("\t Downloading '{}' to '{}'...".format(video_title, filename))
try:
download = urlopen(download_url).read()
f = open(filename, 'wb')
f.write(download)
f.close()
except Exception as e:
print("\t Downlad failed! {}".format(str(e)))
print("\t Skipping...")
else:
print("\t Done.")
def main():
print("\n--------------------------")
print (" Youtube Video Downloader")
print ("--------------------------\n")
try:
video_urls = sys.argv[1:]
except:
video_urls = input('Enter (space-separated) video URLs: ')
for u in video_urls:
youtube_download(u)
print("\n Done.")
if __name__ == '__main__':
main()
答案 1 :(得分:6)
我将无耻地插入my script自动检查有效格式,自动选择最佳质量格式的视频,并适用于YouTube页面的Flash和html5变体(以及Vimeo)
如果您编写了该脚本,那么请查看我的源代码以获取灵感,并随意窃取一些代码。我挑战你,请写一些更好的东西。开源在竞争中茁壮成长!
但是,如果您复制了该脚本并且只是想让它运行起来,我建议您尝试一下我的脚本,看看它是否适合您。您可以从命令行作为脚本访问它,也可以作为另一个python文件中的模块访问它。
(编辑:制作wiki。不寻找声誉。)
答案 2 :(得分:4)
您还可以查看用Python编写的youtube-dl
并查看其编写方式:
答案 3 :(得分:3)
看起来YouTube家伙已经更改了访问视频文件的算法。而不是“令牌”,他们现在使用“签名”变量,“签名”似乎依赖于cookie存储的数据或客户端的IP地址(如果cookie禁用浏览器,如python-2中的urllib)。这是我提出的一个黑客(URL是IP锁定的):
#!/usr/bin/python
import re
from urlparse import *
from urllib import *
def yt_url(video_url):
video_id = parse_qs(urlparse(video_url).query)['v'][0]
get_vars = parse_qs(unquote(urlopen("http://www.youtube.com/get_video_info?video_id="+video_id).read()))
url = get_vars["id"][0].split(",")[1].split("|")[1]
elements = dict()
elements["itag"] = get_vars["itag"][0]
elements["sver"] = get_vars["sver"][0]
elements["expire"] = get_vars["expire"][0]
elements["signature"] = get_vars["signature"][0]
elements["factor"] = get_vars["factor"][0]
elements["id"] = get_vars["id"][0].split(",")[0]
elements["key"] = get_vars["key"][0]
elements["burst"] = get_vars["burst"][0]
elements["sparams"] = get_vars["sparams"][0]
elements["algorithm"] = get_vars["algorithm"][0]
elements["ipbits"] = "8"
for get_var in elements:
url += "&" + get_var + "=" + elements[get_var]
return (get_vars["title"][0], url)
if __name__ == '__main__':
(title, url) = yt_url("http://www.youtube.com/watch?v=4tAr7tuakt0")
print "Title: %s" % (title,)
print "Video: %s" % (url,)
答案 4 :(得分:0)
#!/usr/bin/env python
import urllib2,urllib
import re
import os
import sys
import time
linkurl=raw_input('Enter url:')
linkurl1=urllib.urlopen(linkurl).read()
file1=open("index.html","w")
file1.write(linkurl1)
file1.close()
fname = 'index.html'
## Giving new matrix value to find
find=("yt.playerConfig =",'"title":')
## File reading programme
with open(fname) as infile:
for line_no, line in enumerate(infile, 1):
lline = line.lower()
if any(word.lower() in lline for word in find):
y=line.rstrip()
fileurl=y
y1=y.replace("%3A%2F%2F","://")
y2=y1.replace("%2F","/")
y3=y2.replace("%3F","?")
y4=y3.replace("%3D","=")
y5=y4.replace("%26","&")
y6=y5.replace("%252","%2")
y7=y6.replace("sig","&signature")
#Display video resolution information
print ""
print "Video resolution:"
print "[46=1080(.webm)]--[37=1080(.mp4)]--[35=480(.flv)]--[36=180(.3gpp)]"
print "[45=720(.webm) ]--[22=720(.mp4) ]--[34=360(.flv)]--[17=144(.3gpp)]"
print "[44=480(.webm) ]--[18=360(.mp4) ]--[5=240(.flv) ]"
print "[43=360(.webm) ]"
print ""
# Programme to get all itag list file
itag = re.findall('itag=(\d+)',y)
print `"itag list= "`+`itag`
resol=raw_input("Type itag number: ")
# Programme to get filename file
fname = 'index.html'
find = (' <title>','</title>')
with open(fname) as infile:
for line_no, line in enumerate(infile, 1):
lline = line.lower()
if any(word.lower() in lline for word in find):
y=line.rstrip()
fileurl1=y.split(">")[-2]
filename2=fileurl1.split('"')[-2]
if resol=='46':
# Programme to get webm file in 1080 hd
y1080_webm=re.findall(r'itag=46(.*?)\u0026quality=hd1080',y7)
url_1080_webm1=re.findall(r'\\u0026url=(.*?)\\u0026type',`y1080_webm`)
signature=re.findall(r'signature=(.*?)\\',`y1080_webm`)
url_1080_webm2=`url_1080_webm1`.split("\\")[0]
url_1080_webm=url_1080_webm2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_1080_webm
#print url_1080_webm
ext=".webm"
elif resol=='37':
# Programme to get mp4 file in 1080 hd
y1080_mp4=re.findall(r'itag=37(.*?)\u0026quality=hd1080',y7)
url_1080_mp41=re.findall(r'\\u0026url=(.*?)\\u0026type',`y1080_mp4`)
signature=re.findall(r'signature=(.*?)\\',`y1080_mp4`)
url_1080_mp42=`url_1080_mp41`.split("\\")[0]
url_1080_mp4=url_1080_mp42.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_1080_mp4
#print url_1080_mp4
ext=".mp4"
elif resol=='45':
# Programme to get webm file in 720 hd
y720_webm=re.findall(r'itag=45(.*?)\u0026quality=hd720',y7)
url_720_webm1=re.findall(r'\\u0026url=(.*?)\\u0026type',`y720_webm`)
signature=re.findall(r'signature=(.*?)\\',`y720_webm`)
url_720_webm2=`url_720_webm1`.split("\\")[0]
url_720_webm=url_720_webm2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_720_webm
#print url_720_webm
ext=".webm"
elif resol=='22':
# Programme to get mp4 file in 720 hd
y720_mp4=re.findall(r'itag=22(.*?)\u0026quality=hd720',y7)
url_720_mp41=re.findall(r'\\u0026url=(.*?)\\u0026type',`y720_mp4`)
signature=re.findall(r'signature=(.*?)\\',`y720_mp4`)
url_720_mp42=`url_720_mp41`.split("\\")[0]
url_720_mp4=url_720_mp42.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_720_mp4
#print url_720_mp4
ext=".mp4"
elif resol=='44':
# Programme to get webm file in 480 large
y480_webm=re.findall(r'itag=44(.*?)\u0026quality=large',y7)
url_480_webm1=re.findall(r'\\u0026url=(.*?)\\u0026type',`y480_webm`)
signature=re.findall(r'signature=(.*?)\\',`y480_webm`)
url_480_webm2=`url_480_webm1`.split("\\")[0]
url_480_webm=url_480_webm2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_480_webm
#print url_480_webm
ext=".webm"
elif resol=='35':
# Programme to get flv file in 480 large
y480_flv=re.findall(r'itag=35(.*?)\u0026quality=large',y7)
url_480_flv1=re.findall(r'\\u0026url=(.*?)\\',`y480_flv`)
signature=re.findall(r'signature=(.*?)\\',`y480_flv`)
url_480_flv2=`url_480_flv1`.split("\\")[0]
url_480_flv=url_480_flv2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_480_flv
#print url_480_flv
ext=".flv"
elif resol=='43':
# Programme to get webm file in 360 medium
y360_webm=re.findall(r'itag=43(.*?)\u0026quality=medium',y7)
url_360_webm1=re.findall(r'\\u0026url=(.*?)\\',`y360_webm`)
signature=re.findall(r'signature=(.*?)\\',`y360_webm`)
url_360_webm2=`url_360_webm1`.split("\\")[0]
url_360_webm=url_360_webm2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_360_webm
#print url_360_webm
ext=".webm"
elif resol=='34':
# Programme to get flv file in 360 medium
y360_flv=re.findall(r'itag=34(.*?)\u0026quality=medium',y7)
url_360_flv1=re.findall(r'\\u0026url=(.*?)\\',`y360_flv`)
signature=re.findall(r'signature=(.*?)\\',`y360_flv`)
url_360_flv2=`url_360_flv1`.split("\\")[0]
url_360_flv=url_360_flv2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_360_flv
#print url_360_flv
ext=".flv"
elif resol=='18':
# Programme to get mp4 file in 360 medium
y360_mp4=re.findall(r'itag=18(.*?)\u0026quality=medium',y7)
url_360_mp41=re.findall(r'\\u0026url=(.*?)\\',`y360_mp4`)
signature=re.findall(r'signature=(.*?)\\',`y360_mp4`)
url_360_mp42=`url_360_mp41`.split("\\")[0]
url_360_mp4=url_360_mp42.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_360_mp4
#print url_360_mp4
ext=".mp4"
elif resol=='5':
# Programme to get flv file in 240 small
y240_flv=re.findall(r'itag=5(.*?)\u0026quality=small',y7)
url_240_flv1=re.findall(r'\\u0026url=(.*?)\\',`y240_flv`)
signature=re.findall(r'signature=(.*?)\\',`y240_flv`)
url_240_flv2=`url_240_flv1`.split("\\")[0]
url_240_flv=url_240_flv2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_240_flv
#print url_240_flv
ext=".flv"
elif resol=='36':
# Programme to get 3gpp file in 180 small
y180_3gpp=re.findall(r'itag=36(.*?)\u0026quality=small',y7)
url_180_3gpp1=re.findall(r'\\u0026url=(.*?)\\',`y180_3gpp`)
signature=re.findall(r'signature=(.*?)\\',`y180_3gpp`)
url_180_3gpp2=`url_180_3gpp1`.split("\\")[0]
url_180_3gpp=url_180_3gpp2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_180_3gpp
#print url_180_3gpp
ext=".3gpp"
elif resol=='17':
# Programme to get 3gpp file in 144 small
y144_3gpp=re.findall(r'itag=17(.*?)\u0026quality=small',y7)
url_144_3gpp1=re.findall(r'\\u0026url=(.*?)\\',`y144_3gpp`)
signature=re.findall(r'signature=(.*?)\\',`y144_3gpp`)
url_144_3gpp2=`url_144_3gpp1`.split("\\")[0]
url_144_3gpp=url_144_3gpp2.split("'")[1]+"&signature="+`signature`.split("'")[1]+"&ptk=machinima"
url=url_144_3gpp
#print url_144_3gpp
ext=".3gpp"
#newindex=open("index1.txt",'w')
#newindex.write(y7)
print url
filename=filename2+ext
print filename
req = urllib2.Request(url, headers={'Range':"bytes=0-838860800"})
data = urllib2.urlopen(req)
print "connected to ""http://"+url.split("/")[2]+"/"
f=open(filename,'wb')
meta_data = data.info()
file_size = int(meta_data.getheaders("Content-Length")[0])
print "filesize= "+`file_size/1048576`+" MB"
bytes_received = 0
chunk_size = 10240
while True:
start_time = time.time()
buffer = data.read(chunk_size)
if not buffer:
break
bytes_received += len(buffer)
f.write(buffer)
Td=time.time() - start_time
speed1=round(len(buffer)/1024.0,1)
speed=round(speed1/Td,1)
speed_MB=round(speed/1024.0,1)
speed_GB=round(speed_MB/1024.0,1)
bytes_received_MB=round(bytes_received/1048576.0,3)
percent = bytes_received * 100. / file_size
if speed < 1:
speed_byte=round(len(buffer)/Td,1)
Tr=(file_size-bytes_received)/(60*speed_byte)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f B/s] [eta %1d min] " % (bytes_received_MB, percent,speed_byte,Tr)
elif speed < 1024:
Tr=(file_size-bytes_received)/(60*1024*speed)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f KB/s] [eta %1d min] " % (bytes_received_MB, percent,speed,Tr)
elif speed < 1048576 :
Tr=(file_size-bytes_received)/(60*1024*1024*speed_MB)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f MB/s] [eta %1d min] " % (bytes_received_MB, percent,speed_MB,Tr)
else :
Tr=(file_size-bytes_received)/(60*1024*1024*1024*speed_GB)
status = r"[Downloaded=%.3f MB] [%3.2f%%] [speed= %.1f GB/s] [eta %1d min] " % (bytes_received_MB, percent,speed_GB,Tr)
status = status + chr(8) * (len(status) + 1)
print status,