我刚刚发现这个伟大的wget包装器,我想使用子进程模块将其重写为python脚本。然而,事实证明这给我带来了各种各样的错误。
download()
{
local url=$1
echo -n " "
wget --progress=dot $url 2>&1 | grep --line-buffered "%" | \
sed -u -e "s,\.,,g" | awk '{printf("\b\b\b\b%4s", $2)}'
echo -ne "\b\b\b\b"
echo " DONE"
}
然后可以像这样调用:
file="patch-2.6.37.gz"
echo -n "Downloading $file:"
download "http://www.kernel.org/pub/linux/kernel/v2.6/$file"
有什么想法吗?
来源:http://fitnr.com/showing-file-download-progress-using-wget.html
答案 0 :(得分:4)
我觉得你离我不远。主要是我想知道,为什么要在grep
和sed
以及awk
中运行管道时,为什么要在Python内部完成所有操作呢?
#! /usr/bin/env python
import re
import subprocess
TARGET_FILE = "linux-2.6.0.tar.xz"
TARGET_LINK = "http://www.kernel.org/pub/linux/kernel/v2.6/%s" % TARGET_FILE
wgetExecutable = '/usr/bin/wget'
wgetParameters = ['--progress=dot', TARGET_LINK]
wgetPopen = subprocess.Popen([wgetExecutable] + wgetParameters,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
for line in iter(wgetPopen.stdout.readline, b''):
match = re.search(r'\d+%', line)
if match:
print '\b\b\b\b' + match.group(0),
wgetPopen.stdout.close()
wgetPopen.wait()
答案 1 :(得分:2)
如果你用Python重写脚本;在这种情况下,您可以将wget
替换为urllib.urlretrieve()
:
#!/usr/bin/env python
import os
import posixpath
import sys
import urllib
import urlparse
def url2filename(url):
"""Return basename corresponding to url.
>>> url2filename('http://example.com/path/to/file?opt=1')
'file'
"""
urlpath = urlparse.urlsplit(url).path # pylint: disable=E1103
basename = posixpath.basename(urllib.unquote(urlpath))
if os.path.basename(basename) != basename:
raise ValueError # refuse 'dir%5Cbasename.ext' on Windows
return basename
def reporthook(blocknum, blocksize, totalsize):
"""Report download progress on stderr."""
readsofar = blocknum * blocksize
if totalsize > 0:
percent = readsofar * 1e2 / totalsize
s = "\r%5.1f%% %*d / %d" % (
percent, len(str(totalsize)), readsofar, totalsize)
sys.stderr.write(s)
if readsofar >= totalsize: # near the end
sys.stderr.write("\n")
else: # total size is unknown
sys.stderr.write("read %d\n" % (readsofar,))
url = sys.argv[1]
filename = sys.argv[2] if len(sys.argv) > 2 else url2filename(url)
urllib.urlretrieve(url, filename, reporthook)
示例:
$ python download-file.py http://example.com/path/to/file
将url下载到文件中。如果没有给出文件,那么它使用url中的basename。
如果需要,您还可以运行wget
:
#!/usr/bin/env python
import sys
from subprocess import Popen, PIPE, STDOUT
def urlretrieve(url, filename=None, width=4):
destination = ["-O", filename] if filename is not None else []
p = Popen(["wget"] + destination + ["--progress=dot", url],
stdout=PIPE, stderr=STDOUT, bufsize=1) # line-buffered (out side)
for line in iter(p.stdout.readline, b''):
if b'%' in line: # grep "%"
line = line.replace(b'.', b'') # sed -u -e "s,\.,,g"
percents = line.split(None, 2)[1].decode() # awk $2
sys.stderr.write("\b"*width + percents.rjust(width))
p.communicate() # close stdout, wait for child's exit
print("\b"*width + "DONE")
url = sys.argv[1]
filename = sys.argv[2] if len(sys.argv) > 2 else None
urlretrieve(url, filename)
我没有注意到此代码存在任何缓冲问题。
答案 2 :(得分:2)
之前我做过类似的事。我很乐意与你分享我的代码:)
#!/usr/bin/python2.7
# encoding=utf-8
import sys
import os
import datetime
SHEBANG = "#!/bin/bash\n\n"
def get_cmd(editor='vim', initial_cmd=""):
from subprocess import call
from tempfile import NamedTemporaryFile
# Create the initial temporary file.
with NamedTemporaryFile(delete=False) as tf:
tfName = tf.name
tf.write(initial_cmd)
# Fire up the editor.
if call([editor, tfName], shell=False) != 0:
return None
# Editor died or was killed.
# Get the modified content.
fd = open(tfName)
res = fd.read()
fd.close()
os.remove(tfName)
return res
def main():
initial_cmd = "wget " + sys.argv[1]
cmd = get_cmd(editor='vim', initial_cmd=initial_cmd)
if len(sys.argv) > 1 and sys.argv[1] == 's':
#keep the download infomation.
t = datetime.datetime.now()
filename = "swget_%02d%02d%02d%02d%02d" %\
(t.month, t.day, t.hour, t.minute, t.second)
with open(filename, 'w') as f:
f.write(SHEBANG)
f.write(cmd)
f.close()
os.chmod(filename, 0777)
os.system(cmd)
main()
# run this script with the optional argument 's'
# copy the command to the editor, then save and quit. it will
# begin to download. if you have use the argument 's'.
# then this script will create another executable script, you
# can use that script to resume you interrupt download.( if server support)
所以,基本上,你只需要修改initial_cmd的值,在你的情况下,它是
wget --progress=dot $url 2>&1 | grep --line-buffered "%" | \
sed -u -e "s,\.,,g" | awk '{printf("\b\b\b\b%4s", $2)}'
此脚本将首先创建一个临时文件,然后将shell命令放入其中,并赋予其执行权限。最后运行带有命令的临时文件。
答案 3 :(得分:1)
vim download.py
#!/usr/bin/env python
import subprocess
import os
sh_cmd = r"""
download()
{
local url=$1
echo -n " "
wget --progress=dot $url 2>&1 |
grep --line-buffered "%" |
sed -u -e "s,\.,,g" |
awk '{printf("\b\b\b\b%4s", $2)}'
echo -ne "\b\b\b\b"
echo " DONE"
}
download "http://www.kernel.org/pub/linux/kernel/v2.6/$file"
"""
cmd = 'sh'
p = subprocess.Popen(cmd,
shell=True,
stdin=subprocess.PIPE,
env=os.environ
)
p.communicate(input=sh_cmd)
# or:
# p = subprocess.Popen(cmd,
# shell=True,
# stdin=subprocess.PIPE,
# env={'file':'xx'})
#
# p.communicate(input=sh_cmd)
# or:
# p = subprocess.Popen(cmd, shell=True,
# stdin=subprocess.PIPE,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE,
# env=os.environ)
# stdout, stderr = p.communicate(input=sh_cmd)
然后你可以这样打电话:
file="xxx" python dowload.py
答案 4 :(得分:0)
简单来说,考虑到你有script.sh
个文件,你可以执行它并打印它的返回值,如果有的话:
import subprocess
process = subprocess.Popen('/path/to/script.sh', shell=True, stdout=subprocess.PIPE)
process.wait()
print process.returncode