在iOS 4.0中,Apple重新设计了备份过程。
iTunes用于存储Manifest.plist文件中与备份文件关联的文件名列表,但在iOS 4.0中,它已将此信息移至Manifest.mbdb
您可以通过使用iOS 4.0设备进行备份并查看〜/ Library / Application Support / MobileSync / Backup文件夹(查看具有最新日期的子文件夹)来查看此文件的示例
以下是文本编辑器中文件的截图:
alt text http://supercrazyawesome.com/images/mbdb.png
如何将其解析为Cocoa应用程序,以便我可以为iOS 4.0更新我的(免费)iPhone备份提取程序应用程序(http://supercrazyawesome.com)?
答案 0 :(得分:83)
谢谢你,user374559和reneD - 代码和描述非常有帮助。
我尝试使用Python解析并以Unix ls-l格式打印出信息:
#!/usr/bin/env python
import sys
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
return mbdb
def process_mbdx_file(filename):
mbdx = {} # Map offset of info in the MBDB file => fileID string
data = open(filename).read()
if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
offset = 4
offset = offset + 2 # value 0x02 0x00, not sure what this is
filecount, offset = getint(data, offset, 4) # 4-byte count of records
while offset < len(data):
# 26 byte record, made up of ...
fileID = data[offset:offset+20] # 20 bytes of fileID
fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
offset = offset + 20
mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
mode, offset = getint(data, offset, 2) # 2-byte mode field
mbdx[mbdb_offset] = fileID_string
return mbdx
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
mbdx = process_mbdx_file("Manifest.mbdx")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
答案 1 :(得分:29)
在iOS 5中,删除了Manifest.mbdx文件。出于本文的目的,它无论如何都是多余的,因为域和路径在Manifest.mbdb中,并且可以使用SHA1生成ID哈希。
这是我对galloglass代码的更新,因此它适用于iOS 5设备的备份。唯一的变化是删除process_mbdx_file()并在process_mbdb_file()中添加几行。
使用iPhone 4S和iPad 1进行测试,两者都有大量应用和文件。
#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath)
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
答案 2 :(得分:18)
我完成了这方面的工作 - 即用于Python的我的备份解码器库的iOS 4 + iTunes 9.2更新 - http://www.iki.fi/fingon/iphonebackupdb.py
它做我需要的东西,很少的文档,但随意从那里复制想法; - )
(至少我的备份似乎能正常工作。)
答案 3 :(得分:10)
您可以在此处找到有关MBDB / MBDX格式的信息和一些描述:
http://code.google.com/p/iphonebackupbrowser/
这是我浏览备份文件的应用程序。我试图记录iTunes 9.2附带的新文件的格式。
答案 4 :(得分:7)
这个python脚本真棒。
这是我的Ruby版本(略有改进)和搜索功能。 (适用于iOS 5)
# encoding: utf-8
require 'fileutils'
require 'digest/sha1'
class ManifestParser
def initialize(mbdb_filename, verbose = false)
@verbose = verbose
process_mbdb_file(mbdb_filename)
end
# Returns the numbers of records in the Manifest files.
def record_number
@mbdb.size
end
# Returns a huge string containing the parsing of the Manifest files.
def to_s
s = ''
@mbdb.each do |v|
s += "#{fileinfo_str(v)}\n"
end
s
end
def to_file(filename)
File.open(filename, 'w') do |f|
@mbdb.each do |v|
f.puts fileinfo_str(v)
end
end
end
# Copy the backup files to their real path/name.
# * domain_match Can be a regexp to restrict the files to copy.
# * filename_match Can be a regexp to restrict the files to copy.
def rename_files(domain_match = nil, filename_match = nil)
@mbdb.each do |v|
if v[:type] == '-' # Only rename files.
if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
dst = "#{v[:domain]}/#{v[:filename]}"
puts "Creating: #{dst}"
FileUtils.mkdir_p(File.dirname(dst))
FileUtils.cp(v[:fileID], dst)
end
end
end
end
# Return the filename that math the given regexp.
def search(regexp)
result = Array.new
@mbdb.each do |v|
if "#{v[:domain]}::#{v[:filename]}" =~ regexp
result << v
end
end
result
end
private
# Retrieve an integer (big-endian) and new offset from the current offset
def getint(data, offset, intsize)
value = 0
while intsize > 0
value = (value<<8) + data[offset].ord
offset += 1
intsize -= 1
end
return value, offset
end
# Retrieve a string and new offset from the current offset into the data
def getstring(data, offset)
return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset...(offset + length)]
return value, (offset + length)
end
def process_mbdb_file(filename)
@mbdb = Array.new
data = File.open(filename, 'rb') { |f| f.read }
puts "MBDB file read. Size: #{data.size}"
raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
offset = 4
offset += 2 # value x05 x00, not sure what this is
while offset < data.size
fileinfo = Hash.new
fileinfo[:start_offset] = offset
fileinfo[:domain], offset = getstring(data, offset)
fileinfo[:filename], offset = getstring(data, offset)
fileinfo[:linktarget], offset = getstring(data, offset)
fileinfo[:datahash], offset = getstring(data, offset)
fileinfo[:unknown1], offset = getstring(data, offset)
fileinfo[:mode], offset = getint(data, offset, 2)
if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
fileinfo[:type] = 'l'
elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
fileinfo[:type] = '-'
elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
fileinfo[:type] = 'd'
else
# $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
fileinfo[:type] = '?'
end
fileinfo[:unknown2], offset = getint(data, offset, 4)
fileinfo[:unknown3], offset = getint(data, offset, 4)
fileinfo[:userid], offset = getint(data, offset, 4)
fileinfo[:groupid], offset = getint(data, offset, 4)
fileinfo[:mtime], offset = getint(data, offset, 4)
fileinfo[:atime], offset = getint(data, offset, 4)
fileinfo[:ctime], offset = getint(data, offset, 4)
fileinfo[:filelen], offset = getint(data, offset, 8)
fileinfo[:flag], offset = getint(data, offset, 1)
fileinfo[:numprops], offset = getint(data, offset, 1)
fileinfo[:properties] = Hash.new
(0...(fileinfo[:numprops])).each do |ii|
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo[:properties][propname] = propval
end
# Compute the ID of the file.
fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
# We add the file to the list of files.
@mbdb << fileinfo
end
@mbdb
end
def modestr(val)
def mode(val)
r = (val & 0x4) ? 'r' : '-'
w = (val & 0x2) ? 'w' : '-'
x = (val & 0x1) ? 'x' : '-'
r + w + x
end
mode(val >> 6) + mode(val >> 3) + mode(val)
end
def fileinfo_str(f)
return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
f[:properties].each do |k, v|
info += " #{k}=#{v.inspect}"
end
info
end
end
if __FILE__ == $0
mp = ManifestParser.new 'Manifest.mbdb', true
mp.to_file 'filenames.txt'
end
答案 5 :(得分:4)
我喜欢galloglas的代码,我更改了main函数,以便按应用程序显示总大小的排序列表:
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
mbdx = process_mbdx_file("Manifest.mbdx")
sizes = {}
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
if (fileinfo['mode'] & 0xE000) == 0x8000:
sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
for domain in sorted(sizes, key=sizes.get):
print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
通过这种方式,您可以确定应用程序正在占用所有空间。
答案 6 :(得分:2)
对于那些寻找MBDB文件阅读器的Java实现的人来说,有几个:
答案 7 :(得分:0)
感谢galloglass&#39;回答。该代码适用于Python 2.7。我只想提一件事。读取manifest.mbdb文件时,应使用二进制模式。否则,并非所有内容都被阅读。
我还做了一些小的改动,使代码适用于Python 3.4。这是代码。
#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value << 8) + data[offset]
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
return '', offset + 2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset + length]
return value.decode(encoding='latin-1'), (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename, 'rb').read() # 'b' is needed to read all content at once
if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath.encode())
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4):
r = 'r'
else:
r = '-'
if (val & 0x2):
w = 'w'
else:
w = '-'
if (val & 0x1):
x = 'x'
else:
x = '-'
return r + w + x
return mode(val >> 6) + mode((val >> 3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000:
type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000:
type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000:
type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file(
r"Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print(fileinfo_str(fileinfo, verbose))