这是我的第一个问题,如果在代码示例方面有点长,我会道歉。
作为工作申请的一部分,我被要求编写一个暴露了一些字段的Bit Torrent文件解析器。我做了代码,并被告知我的代码“不是我们从团队领导要求的水平”。哎哟!
这很好,自编码以来已有好几年,列表推导,当天不存在生成器(我从COBOL开始,但用C,C ++等编写)。对我来说,下面的代码非常干净。有时没有需要使用更复杂的结构,语法或模式 - “保持简单”。
我可以请一些Python专家批评这段代码吗?我相信其他人可以看到代码可以改进的地方。还有更多评论等(bencode.py来自http://wiki.theory.org/Decoding_bencoded_data_with_python)
我能想到的领域:
我个人为这段代码感到自豪,所以想知道我需要改进的地方。感谢。
#!/usr/bin/env python2
"""Bit Torrent Parsing
Parses a Bit Torrent file.
A basic parser for Bit Torrent files. Visit http://wiki.theory.org/BitTorrentSpecification for the BitTorrent specification.
"""
__author__ = "...."
__version__ = "$Revision: 1.0 $"
__date__ = "$Date: 2012/10/26 11:08:46 $"
__copyright__ = "Enjoy & Distribute"
__license__ = "Python"
import bencode
import argparse
from argparse import RawTextHelpFormatter
import binascii
import time
import os
import pprint
torrent_files = 0
torrent_pieces = 0
def display_root(filename, root):
"""prints main (root) information on torrent"""
global torrent_files
global torrent_pieces
print
print "Bit Torrent Metafile Structure root nodes:"
print "------------------------------------------"
print "Torrent filename: ", filename
print " Info: %d file(s), %d pieces, ~%d kb/pc" % (
torrent_files,
torrent_pieces,
root['info']['piece length'] / 1024)
if 'private' in root['info']:
if root['info']['private'] == 1:
print " Publish presence: Private"
print " Announce: ", root['announce']
if 'announce-list' in root:
print " Announce List: "
for i in root['announce-list']:
print " ", i[0]
if 'creation date' in root:
print " Creation Date: ", time.ctime(root['creation date'])
if 'comment' in root:
print " Comment: ", root['comment']
if 'created-by' in root:
print " Created-By: ", root['created-by']
print " Encoding: ", root['encoding']
print
def display_torrent_file(info):
"""prints file information (single or multifile)"""
global torrent_files
global torrent_pieces
if 'files' in info:
# multipart file mode
# directory, followed by filenames
print "Files:"
max_files = args.maxfiles
display = max_files if (max_files < torrent_files) else torrent_files
print " %d File %d shown: " % (torrent_files, display)
print " Directory: ", info['name']
print " Filenames:"
i = 0
for files in info['files']:
if i < max_files:
prefix = ''
if len(files['path']) > 1:
prefix = './'
filename = prefix + '/'.join(files['path'])
if args.filehash:
if 'md5sum' in files:
md5hash = binascii.hexlify(files['md5sum'])
else:
md5hash = 'n/a'
print ' %s [hash: %s]' % (filename, md5hash)
else:
print ' %s ' % filename
i += 1
else:
break
else:
# single file mode
print "Filename: ", info['name']
print
def display_pieces(pieceDict):
"""prints SHA1 hash for pieces, limited by arg pieces"""
global torrent_files
global torrent_pieces
# global pieceDict
# limit since a torrent file can have 1,000's of pieces
max_pieces = args.pieces if args.pieces else 10
print "Pieces:"
print " Torrent contains %s pieces, %d shown."% (
torrent_pieces, max_pieces)
print " piece : sha1"
i = 0
while i < max_pieces and i < torrent_pieces:
# print SHA1 hash in readable hex format
print ' %5d : %s' % (i+1, binascii.hexlify(pieceDict[i]))
i += 1
def parse_pieces(root):
"""create dictionary [ piece-num, hash ] from info's pieces
Returns the pieces dictionary. key is the piece number, value is the
SHA1 hash value (20-bytes)
Keyword arguments:
root -- a Bit Torrent Metafile root dictionary
"""
global torrent_pieces
pieceDict = {}
i = 0
while i < torrent_pieces:
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
i += 1
return pieceDict
def parse_root_str(root_str):
"""create dictionary [ piece-num, hash ] from info's pieces
Returns the complete Bit Torrent Metafile Structure dictionary with
relevant Bit Torrent Metafile nodes and their values.
Keyword arguments:
root_str -- a UTF-8 encoded string with root-level nodes (e.g., info)
"""
global torrent_files
global torrent_pieces
try:
torrent_root = bencode.decode(root_str)
except StandardError:
print 'Error in torrent file, likely missing separators like ":"'
if 'files' in torrent_root['info']:
torrent_files = len(torrent_root['info']['files'])
else:
torrent_files = 1
torrent_pieces = len(torrent_root['info']['pieces']) / 20
torrent_piece = parse_pieces(torrent_root)
return torrent_root, torrent_piece
def readfile(filename):
"""read file and return file's data"""
global torrent_files
global torrent_pieces
if os.path.exists(filename):
with open(filename, mode='rb') as f:
filedata = f.read()
else:
print "Error: filename: '%s' does not exist." % filename
raise IOError("Filename not found.")
return filedata
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,
description=
"A basic parser for Bit Torrent files. Visit "
"http://wiki.theory.org/BitTorrentSpecification for "
"the BitTorrent specification.",
epilog=
"The keys for the Bit Torrent MetaInfo File Structure "
"are info, announce, announce-list, creation date, comment, "
"created by and encoding. \n"
"The Info Dictionary (info) is dependant on whether the torrent "
"file is a single or multiple file. The keys common to both "
"are piece length, pieces and private.\nFor single files, the "
"additional keys are name, length and md5sum.For multiple files "
"the keys are, name and files. files is also a dictionary with "
"keys length, md5sum and path.\n\n"
"Examples:\n"
"torrentparse.py --string 'l4:dir14:dir28:file.exte'\n"
"torrentparse.py --filename foo.torrent\n"
"torrentparse.py -f foo.torrent -f bar.torrent "
"--maxfiles 2 --filehash --pieces 2 -v")
filegroup = parser.add_argument_group('Input File or String')
filegroup.add_argument("-f", "--filename",
help="name of torrent file to parse",
action='append')
filegroup.add_argument("-fh", "--filehash",
help="display file's MD5 hash",
action = "store_true")
filegroup.add_argument("-maxf", "--maxfiles",
help="display X filenames (default=20)",
metavar = 'X',
type=int, default=20)
piecegroup = parser.add_argument_group('Torrent Pieces')
piecegroup.add_argument("-p", "--pieces",
help = "display X piece's SHA1 hash (default=10)",
metavar = 'X',
type = int)
parser.add_argument("-s", "--string",
help="string for bencoded dictionary item")
parser.add_argument("-v", "--verbose",
help = "Display MetaInfo file to stdout",
action = "store_true")
args = parser.parse_args()
if args.string:
print
text = bencode.decode(args.string)
print text
else:
for fn in args.filename:
try:
filedata = readfile(fn)
torrent_root, torrent_piece = parse_root_str(filedata)
except IOError:
print "Please enter a valid filename"
raise
if torrent_root:
display_root(fn, torrent_root)
display_torrent_file(torrent_root['info'])
if args.pieces:
display_pieces(torrent_piece)
verbose = True if args.verbose else False
if verbose:
print
print "Verbose Mode: \nPrinting root and info dictionaries"
# remove pieces as its long. display it afterwards
pieceless_root = torrent_root
del pieceless_root['info']['pieces']
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(pieceless_root)
print
print "Print info's piece information: "
pp.pprint(torrent_piece)
print
print "\n"
答案 0 :(得分:4)
以下代码段:
i = 0
while i < torrent_pieces:
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
i += 1
应替换为:
for i in range(torrent_pieces):
pieceDict[i] = root['info']['pieces'][(20*i):(20*i)+20]
这可能是他们想要看到的那种东西。通常,Python代码不应该在for
循环中非常需要显式的索引变量操作。
答案 1 :(得分:3)
我注意到的第一件事是你有很多全局变量。那不好;对于一个问题,您的代码不再是线程安全的。 (我现在看到你在你的问题中注意到了,但那是应该改变的。)
这看起来有点奇怪:
i = 0
for files in info['files']:
if i < max_files:
# ...
else:
break
相反,你可以这样做:
for file in info['files'][:max_files]:
# ...
我还注意到你解析文件足以输出所有漂亮打印的数据。您可能希望将其放入适当的结构中。例如,有Torrent
,Piece
和File
类。