存档/打包内容为纯文本表示的目录?

时间:2017-03-31 15:29:22

标签: linux bash

在Linux / bash下,如何获取其内容目录的纯文本表示? (请注意,通过"纯文本"这里我的意思是" UTF-8")。

换句话说,我怎么能"打包"或"存档"一个目录(包含内容 - 包括二进制文件)作为纯文本文件 - 这样我就可以解包"稍后,并获取与其内容相同的目录?

1 个答案:

答案 0 :(得分:0)

我对此感兴趣了一段时间,我想我终于设法制作了一个适用于Python 2.7和3.4的脚本 - 但是,我仍然想知道是否还有其他内容做同样的事。这里是一个要点(还有一些评论):

https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632

否则,我在这里(下方)发布一个略有删节的版本以供参考。

用法是:归档/打包成.json文本文件:

python archdir2text-json.py -a /tmp > myarchdir.json

...并从.json文本文件解压缩到当前(调用)目录:

python archdir2text-json.py -u myarchdir.json

二进制文件作为base64处理。

这是脚本:

<强> archdir2text-json.py

#!/usr/bin/env python

import pprint, inspect
import argparse
import os
import stat
import errno
import base64
import codecs

class SmartDescriptionFormatter(argparse.RawDescriptionHelpFormatter):
  def _fill_text(self, text, width, indent):
    if text.startswith('R|'):
      paragraphs = text[2:].splitlines()
      rebroken = [argparse._textwrap.wrap(tpar, width) for tpar in paragraphs]
      rebrokenstr = []
      for tlinearr in rebroken:
        if (len(tlinearr) == 0):
          rebrokenstr.append("")
        else:
          for tlinepiece in tlinearr:
            rebrokenstr.append(tlinepiece)
      return '\n'.join(rebrokenstr)
    return argparse.RawDescriptionHelpFormatter._fill_text(self, text, width, indent)

textchars = bytearray({7,8,9,10,12,13,27} | set(range(0x20, 0x100)) - {0x7f})
is_binary_string = lambda bytes: bool(bytes.translate(None, textchars))

cwd = os.getcwd()

if os.name == 'nt':
  import win32api, win32con
def folder_is_hidden(p):
  if os.name== 'nt':
    attribute = win32api.GetFileAttributes(p)
    return attribute & (win32con.FILE_ATTRIBUTE_HIDDEN | win32con.FILE_ATTRIBUTE_SYSTEM)
  else:
    return os.path.basename(p).startswith('.') #linux-osx

def path_hierarchy(path):
  hierarchy = {
    'type': 'folder',
    'name': os.path.basename(path),
    'path': path,
  }
  try:
    cleared_contents = [contents
                        for contents in os.listdir(path)
                        if not(
                          os.path.isdir(os.path.join(path, contents))
                          and
                          folder_is_hidden(os.path.join(path, contents))
                        )]
    hierarchy['children'] = [
      path_hierarchy(os.path.join(path, contents))
      for contents in cleared_contents
    ]
  except OSError as e:
    if e.errno == errno.ENOTDIR:
      hierarchy['type'] = 'file'
    else:
      hierarchy['type'] += " " + str(e)
  if hierarchy['type'] == 'file':
    isfifo = stat.S_ISFIFO(os.stat(hierarchy['path']).st_mode)
    if isfifo:
      ftype = "fifo"
    else:
      try:
        data = open(hierarchy['path'], 'rb').read()
        ftype = "bin" if is_binary_string(data) else "txt"
        if (ftype == "txt"):
          hierarchy['content'] = data.decode("utf-8")
        else:
          hierarchy['content'] = base64.b64encode(data).decode("utf-8")
      except Exception as e:
        ftype = str(e)
    hierarchy['ftype'] = ftype
  return hierarchy

def recurse_unpack(inobj, relpath=""):
  if (inobj['type'] == "folder"):
    rpname = relpath + inobj['name']
    sys.stderr.write("folder name: " + rpname + os.linesep);
    os.mkdir(rpname)
    for tchild in inobj['children']:
      recurse_unpack(tchild, relpath=relpath+inobj['name']+os.sep)
  elif (inobj['type'] == "file"):
    rfname = relpath + inobj['name']
    sys.stderr.write("file name: " + rfname + os.linesep)
    if inobj['ftype'] == "txt":
      with codecs.open(rfname, "w", "utf-8") as text_file:
        text_file.write(inobj['content'])
    elif inobj['ftype'] == "bin":
      with open(rfname, "wb") as bin_file:
        bin_file.write(base64.b64decode(inobj['content']))

if __name__ == '__main__':
  import json
  import sys

  parser = argparse.ArgumentParser(formatter_class=SmartDescriptionFormatter, description="""R|Command-line App that packs/archives (and vice-versa) a directory to a plain-text .json file; should work w/ both Python 2.7 and 3.4

see full help text in https://gist.github.com/anonymous/1a68bf2c9134fd5312219c8f68713632""")

  parser.add_argument('input_paths', type=str, nargs='*', default=['.'],
                      help='Paths to files/directories to include in the archive; or path to .json archive file')

  group = parser.add_mutually_exclusive_group(required=True)
  group.add_argument('-a', '--archive', action='store_true', help="Interpret input_paths as paths to files/directories, and archive them to a .json file (output to stdout)")
  group.add_argument('-u', '--unpack', action='store_true', help="Interpret input_paths as path to an archive .json file, and unpack it in the current directory")

  args = parser.parse_args()

  if (args.archive):
    valid_input_paths = []
    for p in args.input_paths:
      if os.path.isdir(p) or os.path.exists(p):
        valid_input_paths.append(p)
      else:
        sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
    sys.stderr.write("Encoding input path(s): " + str(valid_input_paths) + os.linesep)
    path_hier_arr = [path_hierarchy(vp) for vp in valid_input_paths]
    outjson = json.dumps(path_hier_arr, indent=2, sort_keys=True, separators=(',', ': '))
    print(outjson)
  elif (args.unpack):
    valid_input_paths = []
    for p in args.input_paths:
      if os.path.isdir(p) or os.path.exists(p):
        valid_input_paths.append(p)
      else:
        sys.stderr.write("Ignoring invalid input path: " + p + os.linesep)
    for vp in valid_input_paths:
      with open(vp) as data_file:
        data = json.load(data_file)
      for datachunk in data:
        recurse_unpack(datachunk)