对于我第一次涉足python,我写了一些解析代码,它按照我的意图运行。
我想与其他一些外部人分享这段代码(因为它所执行的功能可能对他们有用),说实话,我对我的笨重和顶级编码感到羞耻......实际工作部分代码很短,但我似乎花了大约70%的文件声明和重新声明变量....
我很确定这不是正确的方法,而且我不是要求某人逐行挑选我的编码,但是一些关于整理混乱的基本指针会很棒。
是的,todo是在推出它之前评论各个部分....
import re, os
def setGlobals():
###################### match returns
global sigVersionMatch
global filepathMatch
global statusMatch
global puidMatch
global mimeMatch
global status2Match
global warningMatch
global filenameMatch
global fileExtensionMatch
###################### Flags and counters
global lineCounter
global headerFlag
global newLine
###################### header variables
global headerLineOne
global headerLineTwo
global sigVersion
###################### searches as a variable
global SearchForStatus
global SearchForFilename
global SearchForFilepath
global SearchForPuid
global SearchForMime
global SearchForStatus2
global SearchForWarning
global SearchForFileExtension
####################### searchstring variables
global filepath
global status
global puid
global mime
global status2
global warning
global filename
global sigVersion
global fileExtension
###################### file name lists
global newfile
####################### Flag and counters settings
headerFlag = 0
lineCounter = 0
newLine=""
###################### search strings headers
headerLineOne = re.compile(r'(DROID Version,)')
headerLineTwo = re.compile(r'(Status,File,Warning,)')
sigVersion = re.compile(r'(?<=SigFile Version,")[0-9]*')
###################### search strings body
filepath = re.compile(r'(?P<filepath>(F:\\ExLib.*\w))')
status = re.compile(r'(?P<status>(?<!,")(Positive|Not identified|Tentative))')
puid = re.compile(r'(?P<puid>(x-fmt/|fmt/)([0-9]{1,3}))')
mime = re.compile(r'(?P<mime>([a-zA-Z]*\/([a-zA-Z]|\-)*(?=",)))')
status2 = re.compile(r'(?P<status2>(Positive \(Specific Format\)|Positive \(Generic Format\)|(Tentative)(?=(",""))))')
warning = re.compile(r'(?P<warning>(Possible file extension mismatch))')
filename = re.compile(r'(?P<filenam>(V[0-9]\-.*\w))')
headerLineOne = re.compile(r'(DROID Version,)')
headerLineTwo = re.compile(r'(Status,File,Warning,)')
sigVersion = re.compile(r'(?<=SigFile Version,")[0-9]*')
fileExtension = re.compile(r'(?<=\.).*')
def doSearches(line):
global SearchForStatus
global SearchForFilename
global SearchForFilepath
global SearchForPuid
global SearchForMime
global SearchForStatus2
global SearchForWarning
global SearchForFileExtension
global status
global filename
global filepath
global puid
global mime
global status2
global warning
global fileExtension
global filepathMatch
global statusMatch
global puidMatch
global mimeMatch
global status2Match
global warningMatch
global filenameMatch
global fileExtensionMatch
####### do searches
SearchForStatus = status.search(line)
SearchForFilename = filename.search(line)
SearchForFilepath = filepath.search(line)
SearchForPuid = puid.search(line)
SearchForMime = mime.search(line)
SearchForStatus2 = status2.search(line)
SearchForWarning = warning.search(line)
###### convert search returns to strings
if SearchForStatus:
doInitialseVariables()
statusMatch = str(SearchForStatus.group())
if SearchForFilename:
filenameMatch = str(SearchForFilename.group())
SearchForFileExtension = fileExtension.search(filenameMatch)
if SearchForFileExtension:
fileExtensionMatch = str(SearchForFileExtension.group())
if SearchForFilepath:
filepathMatch = str(SearchForFilepath.group())
if SearchForPuid:
puidMatch = str(SearchForPuid.group())
if SearchForMime:
mimeMatch = str(SearchForMime.group())
if SearchForStatus2:
status2Match = str(SearchForStatus2.group())
if SearchForWarning:
warningMatch = str(SearchForWarning.group())
if statusMatch == str("Not identified"):
doBuildLine(line)
if SearchForPuid:
doBuildLine(line)
def doBuildLine(line):
global filepathMatch
global statusMatch
global puidMatch
global mimeMatch
global status2Match
global warningMatch
global filenameMatch
global fileExtensionMatch
global newLine
global lineCounter
global newfile
lineCounter = lineCounter + 1
print lineCounter
newLine = "3,"+ str(sigVersionMatch)+",slow," + str(lineCounter) + ",,," + str(filepathMatch) + "," + str(filenameMatch) + ",," + str(statusMatch) + ",,," + str(fileExtensionMatch) + ",," + str(warningMatch) + ",,," + str(puidMatch) + "," + str(mimeMatch) + ",,\n"
outfile = open(newfile,"a")
outfile.write(newLine)
outfile.close()
def doInitialseVariables():
global filepathMatch
global statusMatch
global puidMatch
global mimeMatch
global status2Match
global warningMatch
global filenamMatch
global lineOnceFlag
global fileExtensionMatch
global lineOneFlag
global lineTwoFlag
puidMatch = ""
mimeMatch= ""
status2Match = ""
warningMatch = ""
statusMatch = ""
filepathMatch = ""
filenameMatch = ""
fileExtensionMatch = ""
def doGetHeaderVariables(line):#matches header lines, strips sig version, saves as sigVersionMatch
global sigVersionMatch
M1 = headerLineOne.search(line)
M2 = headerLineTwo.search(line)
M3 = sigVersion.search(line)
if M3:
sigVersionMatch = str(M3.group())
def doStartProcessing(line):
global headerFlag
global lineCounter
if headerFlag == 0: #stops header match testing once successfully complete
doGetHeaderVariables(line)
headerFlag = 1
doSearches(line)
if __name__ == "__main__":
setGlobals()
global newfile
doInitialseVariables() #sets up the variable space to begin
directory = "C:\droid\logs\\"
extension = ".csv"
list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
for currentfile in list_of_files:
logpath = str(directory)+str(currentfile)
newfile = str(directory) + "cleaned\\" + str(currentfile)
for line in open(logpath,'r'):
doStartProcessing(line)
headerFlag = 0
lineCounter = 0
答案 0 :(得分:6)
您只需要将global
语句用于要在函数中重新分配的名称。您可以在没有它的情况下访问任何您想要的名称,包括访问它以调用将修改该对象的方法。
如果您使用课程,您的代码将大大简化。全局变量将成为对象的属性,您的函数将成为方法。
不要使用制表符缩进,使用空格(如果你这样做,代码在上面会被读取)。
在使用它们之前,您不需要编译正则表达式。 re
模块会自动为您编译和缓存。
答案 1 :(得分:1)
如果您还没有完成本教程:
如果您遇到问题,请在此处提出问题。
阅读PEP-8
答案 2 :(得分:1)
认真地建议Mark Katz's "Learning Python"关于全局和局部变量,函数声明等的使用的真正好的部分。
根本不是什么意思,因为我知道我早就把这些东西搞得一团糟,但是把这些代码放在一个受密码保护的文件中,阅读那本书,重写它,然后向公众展示。永远不要让除了你自己的人拥有该文件的密码。它的主要用途是提醒你在极短的时间内到达了多远。
那就是说,真的很好继续让那么多正则表达式为你工作。
答案 3 :(得分:1)
How to Think Like a Computer Scientist对初学者来说是一本很棒的书,它使用Python作为语言。它以清晰的方式涵盖了类和对象,并提供了很好的示例。额外奖励:它是免费的,但不要让那个愚弄你。 :)