我正在尝试使用Python 3从远程TGZ存档下载单个文本文件,但目前我只能在内存中下载整个TGZ,然后使用tarfile作为本地文件,问题是我不能在使用远程文件时使用模式r:gz
并且不允许我提取单个文件,因为不允许随机访问(使用r|gz
不要让我打开远程存档);我的工作代码如下:
#!/usr/bin/python
''' This script reads a configuration TOML file from a TGZ remote compressed archive '''
# import urllib.request
import tarfile
import sys
import argparse
# from lzma import LZMAFile
import requests
import io
def main(args):
''' gets the TOML data from the TAR archive '''
# tgz_file_url example: http://localhost/npm/creard-npm/@creard/helloworldmodule/-/helloworldmodule-1.0.0.tgz
tar_stream = requests.get(args.tgz_file_url)
tar_file = tarfile.open(fileobj=io.BytesIO(tar_stream.content), mode="r:gz")
# toml_path example: package/assets/config/data/module_setup.toml
tar_member = tar_file.getmember(args.toml_path)
data = tar_file.extractfile(tar_member).read()
sys.stdout.write(str(data))
def arguments():
''' gets arguments '''
parser = argparse.ArgumentParser(
description="Script for that reads a configuration TOML \
file from a TGZ remote compressed archive"
)
parser.add_argument(
"--toml_path", help="The TOML file path inside the TGZ archive",
type=str,
required=True)
parser.add_argument(
"--tgz_file_url", help="The URL of the remote TGZ archive",
type=str,
default=sys.stdin.read())
return parser.parse_args()
ARGS = arguments()
main(ARGS)
我认为在这个thread中是可行的,但我不想依赖于Java