如何编写python脚本以从格式化的驱动器中恢复文件?

时间:2014-04-26 18:20:52

标签: python file-recovery

我想编写一个python脚本,使用它可以从格式化的驱动器中恢复文件。我知道格式化不会删除驱动器上的数据,但会标记可覆盖的空间。那么如何恢复那些尚未被覆盖的文件呢?

3 个答案:

答案 0 :(得分:1)

这样的脚本不太可能工作,因为python函数和C库实现文件处理意味着在完整的文件系统上工作。恢复数据需要做的是从磁盘读取原始数据。所以也许你的问题应该是这个。

stackoverflow上的相关python问题:

答案 1 :(得分:1)

也许问题不是关于python脚本而是文件恢复。在这种情况下,根据驱动器的格式和所使用的操作系统,您需要的策略会有所不同。

您可以尝试完全不使用python来恢复文件,这是利用文件系统和操作系统的特定特征来恢复已删除的文件。

答案 2 :(得分:0)

你可以使用 sleuthkit:-

import argparse
import subprocess
import re
import os

TYPECODES = ['\-', 'r', 'd', 'b', 'l', 'p', 's', 'w', 'v']
DESCRIPTIONS = [
    'unknown type',
    'regular file',
    'deleted file',
    'block device',
    'symbolic link',
    'named FIFO',
    'shadow file',
    'whiteout file',
    'TSK virtual file',
]
TYPEDICT = dict(zip((tt.strip('\\') for tt in TYPECODES),  DESCRIPTIONS))

parser = argparse.ArgumentParser(
    description='Recover files from a disk image using SleuthKit',
)
parser.add_argument(
    'image', type=str, nargs=1, help='path to disk image or mount point',
)
parser.add_argument(
    '-o', '--output', type=str, nargs='?', dest='output', default='recovered',
    help=('output extracted files to this directory [default=./recovered/]'),
)
parser.add_argument(
     '-v', '--verbose', dest='verbose', action='store_true',
    default=False, help=('print progress message'),
)


def recover(imgpath, outpath, verbose=False):

    # check that we can open image
    try:
        with open(imgpath, 'r'):
            pass
    except IOError:
        print('Unable to open %s. Check that the path is '
              'correct, and that you have read permission.' % imgpath)
        return

    # if the output directory exists, check that it's writeable
    if os.path.isdir(outpath):
        if not os.access(outpath, os.W_OK):
            print('Output directory %s is not writeable - check permissions'
                  % outpath)
            return
    # otherwise create it
    else:
        try:
            os.makedirs(outpath)
        except IOError:
            print('Could not create output directory %s - check permissions'
                  % outpath)
            return

    cmd = ['fls', '-i', 'raw', '-p', '-r', imgpath]

    p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = p.communicate()

    if p.returncode:
        print('Command "%s" failed:\n%s' % (' '.join(cmd), err))
        return

    ft = ''.join(TYPECODES)
    regex = '([%s])/([%s])\s+\*\s+(\d+):\s+(.*)' % (ft, ft)
    success = {}
    failure = {}
    skipped = {}

    for ftype, mtype, inode, relpath in re.findall(regex, out):

        recpath = os.path.join(outpath, relpath)
        recdir, recname = os.path.split(recpath)
        item = {relpath:[imgpath, relpath]}

        # don't try to recover directories
        if os.path.isdir(recpath):
            continue

        # only worth recovering deleted files
        elif (ftype in ('r', 'd')) and (mtype in ('r', 'd')):
            if not os.path.isdir(recdir):
                if os.path.exists(recdir):
                    os.remove(recdir)
                os.makedirs(recdir)
            cmd = ['icat', '-i', 'raw', '-r', imgpath, inode]
            with open(recpath, 'wb', 4096) as outfile:
                err = subprocess.call(cmd, stdout=outfile, bufsize=4096)
            if err:
                msg = '[FAILED]'
                failure.update(item)
            else:
                msg = '[RECOVERED]'
                success.update(item)
            if verbose:
                if ftype != mtype:
                    realloc_msg = (
                        '[WARNING: file name structure (%s) '
                        'does not match metadata (%s)]'
                        % (TYPEDICT[ftype], TYPEDICT[mtype]))
                else:
                    realloc_msg = ''
                print('%s %s:%s --> %s %s'
                       % (msg, imgpath, inode, recpath, realloc_msg))
        else:
            # skip unknown/other file types
            if verbose:
                print('[SKIPPED] %s:%s [%s / %s]'
                       % (imgpath, inode, TYPEDICT[ftype], TYPEDICT[mtype]))
            skipped.update(item)

    print('-' * 50)
    nsuccesses = len(success)
    nfailures = len(failure)
    nskipped = len(skipped)
    print('%i files successfully recovered to %s'
          % (len(success), outpath))
    print('%i files skipped' % nskipped)
    print('%i files could not be successfully recovered' % nfailures)
    if nfailures:
        print('\n'.join([(' * ' + pth) for pth in failure.keys()]))
    print('-' * 50)

if __name__ == '__main__':
    args = parser.parse_args()
    imgpath = args.image[0]
    outpath = args.output
    recover(imgpath, outpath, verbose=args.verbose)

只需将脚本复制粘贴到 Pycharm 中即可。