在Linux / bash
下,如何获取其内容目录的纯文本表示? (请注意,通过"纯文本"这里我的意思是" UTF-8")。
换句话说,我怎么能"打包"或"存档"一个目录(包含内容 - 包括二进制文件)作为纯文本文件 - 这样我就可以解包"稍后,并获取与其内容相同的目录?
答案 0 :(得分:0)
我对此感兴趣了一段时间,我想我终于设法制作了一个适用于Python 2.7和3.4的脚本 - 但是,我仍然想知道是否还有其他内容做同样的事。这里是一个要点(还有一些评论):
https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632
否则,我在这里(下方)发布一个略有删节的版本以供参考。
用法是:归档/打包成.json文本文件:
python archdir2text-json.py -a /tmp > myarchdir.json
...并从.json文本文件解压缩到当前(调用)目录:
python archdir2text-json.py -u myarchdir.json
二进制文件作为base64处理。
这是脚本:
<强> archdir2text-json.py
强>
#!/usr/bin/env python
import pprint, inspect
import argparse
import os
import stat
import errno
import base64
import codecs
class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter):
def _fill_text(self, text, width, indent):
if text.startswith('R|'):
paragraphs = text[2:].splitlines()
rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs]
rebrokenstr = []
for tlinearr in rebroken:
if (len(tlinearr) == 0):
rebrokenstr.append("")
else:
for tlinepiece in tlinearr:
rebrokenstr.append(tlinepiece)
return '\n'.join(rebrokenstr)
return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent)
textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))
cwd = os.getcwd()
if os.name == 'nt':
import win32api, win32con
def folder_is_hidden(p):
if os.name== 'nt':
attribute = win32api.GetFileAttributes(p)
return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
else:
return os.path.basename(p).startswith('.') #linux-osx
def path_hierarchy(path):
hierarchy = {
'type': 'folder',
'name': os.path.basename(path),
'path': path,
}
try:
cleared_contents = [contents
for contents in os.listdir(path)
if not(
os.path.isdir(os.path.join(path, contents))
and
folder_is_hidden(os.path.join(path, contents))
)]
hierarchy['children'] = [
path_hierarchy(os.path.join(path, contents))
for contents in cleared_contents
]
except OSError as e:
if e.errno == errno.ENOTDIR:
hierarchy['type'] = 'file'
else:
hierarchy['type'] += " " + str(e)
if hierarchy['type'] == 'file':
isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode)
if isfifo:
ftype = "fifo"
else:
try:
data = open(hierarchy['path'], 'rb').read()
ftype = "bin" if is_binary_string(data) else "txt"
if (ftype == "txt"):
hierarchy['content'] = data.decode("utf-8")
else:
hierarchy['content'] = base64.b64encode(data).decode("utf-8")
except Exception as e:
ftype = str(e)
hierarchy['ftype'] = ftype
return hierarchy
def recurse_unpack(inobj, relpath=""):
if (inobj['type'] == "folder"):
rpname = relpath + inobj['name']
sys.stderr.write("folder name: " + rpname + os.linesep);
os.mkdir(rpname)
for tchild in inobj['children']:
recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep)
elif (inobj['type'] == "file"):
rfname = relpath + inobj['name']
sys.stderr.write("file name: " + rfname + os.linesep)
if inobj['ftype'] == "txt":
with codecs.open(rfname, "w", "utf-8") as text_file:
text_file.write(inobj['content'])
elif inobj['ftype'] == "bin":
with open(rfname, "wb") as bin_file:
bin_file.write(base64.b64decode(inobj['content']))
if __name__ == '__main__':
import json
import sys
parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4
see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""")
parser.add_argument('input_paths', type=str, nargs='*', default=['.'],
help='Paths to files/directories to include in the archive; or path to .json archive file')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)")
group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory")
args = parser.parse_args()
if (args.archive):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep)
path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths]
outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': '))
print(outjson)
elif (args.unpack):
valid_input_paths = []
for p in args.input_paths:
if os.path.isdir(p) or os.path.exists(p):
valid_input_paths.append(p)
else:
sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
for vp in valid_input_paths:
with open(vp) as data_file:
data = json.load(data_file)
for datachunk in data:
recurse_unpack(datachunk)