我是python的真正初学者,但我有以下脚本成功运行。它首先创建.xml文件列表,然后为每个.xml执行程序。
每个.xml需要2-4分钟才能完成,我需要运行数千个,所以我一直试图通过使用多处理加速我的脚本,但它似乎超出了我的技能。
非常感谢任何有关如何修改它的建议。
# import modules
import os, sys, shutil, subprocess, fnmatch
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"T:\erin\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
def runPhoenix(project_file):
print "Running Phoenix @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
print "Phoenix Complete @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT"))
# Create list of XMLs
project_files = []
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# run project files
for project_file in project_files:
runPhoenix(project_file)
print "completed"
编辑1:我已经设法以我认为多处理需要的格式重新编写了我的代码......
# import modules
import os, sys, shutil, subprocess, fnmatch, time
from datetime import datetime, timedelta
from time import strptime
# Set variables
project_folder = r"C:\TheHillsPilot\Phoenix\GeneralRuns\ProjectXMLs\indivs_sample"
phoenix_exe_file = r'C:\Phoenix\Phoenix.exe'
# Create definitions
# Definition: print messages when using IDLE
def log(msg):
print msg
# Definition: Create list of XMLs
def createlist():
for file_name in os.listdir(project_folder):
if fnmatch.fnmatch(file_name,'*.xml'):
file_path = os.path.join(project_folder, file_name)
project_files.append(file_path)
# Definition: Run Phoenix
def runPhoenix(project_file):
log("Running Phoenix @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")) + " - " + project_file)
process = subprocess.Popen([phoenix_exe_file,project_file])
process.wait()
log("Phoenix Complete @: " + str(datetime.now().strftime("%a, %d %b %Y %H:%M:%S GMT")))
# Definition: main
def main():
log("creating list of XMLs")
createlist()
for project_file in project_files:
runPhoenix(project_file)
# Process: Create empty list
project_files = []
# Process: Run main
if __name__ == '__main__':
log("True")
time_start = time.clock()
main()
time_end = time.clock()
log("Time taken in main in seconds(s) is : {}".format(str(time_end - time_start)))
log("completed")
答案 0 :(得分:0)
---第1部分了解问题---
你的问题看起来有点复杂。让我看看我是否正确理解您的Python程序。你的程序主要做两件事:
查看项目文件夹,找到符合某些条件的xml文件,并创建匹配的xml文件的文件名列表。
使用runPhoenix.exe函数处理或可能将旧列表中的每个xml文件转换为新的“phoenix”文件列表。
我对html和xml知之甚少,从未听说过xml文件的phoenix程序。
但我认为你的问题一般是试图通过并行执行来加速一系列耗时的工作。
让我举一个你一般问题的具体例子。例如,您有1,000个英文文本文件,并且您希望将英文文本文件翻译成西班牙文。目前,您只有一名翻译员按顺序完成工作,这需要很长时间。
所以你想说,4位翻译员可以并行完成这些工作。
一种可能的解决方案是使用Python多处理程序包,它可以创建一个4个翻译工作进程池,同时完成这些作业。这样,您可以快4倍。
如果您认为我正确理解您的问题,我可以建议一个粗略的Python多处理程序示例来做一些简单的文本处理供您参考。
---第2部分创建/打印文本文件---
# *** Python 3.6.5 Text file create/print functions - tlfong01 2018apr18hkt1521 ***
textFileNameLinelistDict = \
{
'0' : {'TextFileName': 'textFile1.txt', 'Linelist' :['line11\n', 'line12\n']},
'1' : {'TextFileName': 'textFile2.txt', 'Linelist' :['line21\n', 'line22\n']},
}
def createManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
lineList = textFileNameLinelistDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileNameLinelistDict):
for textFileNum in range(len(textFileNameLinelistDict)):
textFileName = textFileNameLinelistDict[str(textFileNum)]['TextFileName']
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def createTwoTextFiles():
createManyTextFiles(textFileNameLinelistDict)
return
def printTwoTextFiles():
printManyTextFiles(textFileNameLinelistDict)
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
textFile1.txt
line11
line12
textFile2.txt
line21
line22
---第3部分翻译文本文件---
现在我编写了一个'翻译'功能,输入一个文本文件并输出一个新的文本文件,所有小写字母都转换为大写字母。
# *** Python 3.6.5 Text file translate - tlfong01 2018apr18hk1940 ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11z\n', 'line12z\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21z\n', 'line22z\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def translateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def shiftUpperCase(string):
return string.upper()
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def translateTwoTextFiles():
translateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
# *** Main ***
def main():
createTwoTextFiles()
printTwoInputTextFiles()
translateTwoTextFiles()
printTwoOutputTextFiles()
return
if __name__ == '__main__':
main()
# *** Sampel output ***
'''
inputTextFile1.txt
line11z
line12z
inputTextFile2.txt
line21z
line22z
outputTextFile1.txt
LINE11Z
LINE12Z
outputTextFile2.txt
LINE21Z
LINE22Z
'''
---第4部分使用池进行多处理---
我已经为多处理编写了更多测试。到目前为止一切都很好。
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr18hk2153 ***
# *** Text file dictionary ***
textFileDict = \
{
'0' : {'InputTextFileName' : 'inputTextFile1.txt',
'Linelist' : ['line11x\n', 'line12x\n'],
'OutputTextFileName' : 'outputTextFile1.txt',
},
'1' : {'InputTextFileName' : 'inputTextFile2.txt',
'Linelist' : ['line21y\n', 'line22y\n'],
'OutputTextFileName' : 'outputTextFile2.txt',
}
}
# *** Create text file ***
def createManyTextFiles(textFileDict):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)]['InputTextFileName']
lineList = textFileDict[str(textFileNum)]['Linelist']
with open(textFileName, mode = 'w', encoding = 'utf8') as textFile:
for line in lineList:
textFile.write(line)
return
# *** Print text files ***
def printOneTextFile(textFileName):
print(textFileName)
with open(textFileName, mode = 'r', encoding = 'utf8') as textFile:
print(textFile.read())
return
def printManyTextFiles(textFileDict, fileNameType):
for textFileNum in range(len(textFileDict)):
textFileName = textFileDict[str(textFileNum)][fileNameType]
printOneTextFile(textFileName)
return
# *** Sequential translate text files ***
def shiftUpperCase(string):
return string.upper()
def translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction):
inputTextFile = open(inputTextFileName, mode = 'r', encoding = 'utf8')
outputTextFile = open(outputTextFileName, mode = 'w', encoding = 'utf8')
for line in inputTextFile:
outputTextFile.write(translateFunction(line))
inputTextFile.close()
outputTextFile.close()
return
def sequentialTranslateManyTextFiles(testFileDict, translateFunction):
for textFileNum in range(len(textFileDict)):
inputTextFileName = textFileDict[str(textFileNum)]['InputTextFileName']
outputTextFileName = textFileDict[str(textFileNum)]['OutputTextFileName']
translateOneTextFile(inputTextFileName, outputTextFileName, translateFunction)
return
def shiftUpperCaseFileNameList(fileNameList):
translateOneTextFile(fileNameList[0], fileNameList[1], shiftUpperCase)
return
# *** Test functions ***
def createTwoTextFiles():
createManyTextFiles(textFileDict)
return
def printTwoInputTextFiles():
printManyTextFiles(textFileDict, 'InputTextFileName')
return
def sequentialTranslateTwoTextFiles():
sequentialTranslateManyTextFiles(textFileDict, shiftUpperCase)
return
def printTwoOutputTextFiles():
printManyTextFiles(textFileDict, 'OutputTextFileName')
return
def sequentialShiftUpperCaseManyTextFiles():
inputTextFile1 = 'inputTextFile1.txt'
inputTextFile2 = 'inputTextFile2.txt'
outputTextFile1 = 'outputTetFile1.txt'
outputTextFile2 = 'outputTetFile2.txt'
fileNameFunctionList0 = [inputTextFile1, outputTextFile1, shiftUpperCase]
fileNameFunctionList1 = [inputTextFile2, outputTextFile2, shiftUpperCase]
shiftUpperCaseFileNameList(fileNameFunctionList0)
shiftUpperCaseFileNameList(fileNameFunctionList1)
printOneTextFile(inputTextFile1)
printOneTextFile(outputTextFile1)
printOneTextFile(inputTextFile2)
printOneTextFile(outputTextFile2)
return
def parallelShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
outputTextFileName1 = 'outputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName2 = 'outputTextFile3.txt'
# *** parallel translating the input files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
# *** print input out files ***
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def main():
# *** Sequential translation using text file dictionary ***
createTwoTextFiles()
printTwoInputTextFiles()
sequentialTranslateTwoTextFiles()
printTwoOutputTextFiles()
# *** Sequential shift upper case using text file name lists ***
sequentialShiftUpperCaseManyTextFiles()
# *** Parallel shift upper case using text file name lists ***
parallelShiftUpperCaseManyTextFiles()
return
if __name__ == '__main__':
main()
# *** Sample output ***
inputTextFile1.txt
line11x
line12x
inputTextFile2.txt
line21y
line22y
outputTextFile1.txt
LINE11X
LINE12X
outputTextFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTetFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTetFile2.txt
LINE21Y
LINE22Y
inputTextFile1.txt
line11x
line12x
outputTextFile1.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile3.txt
LINE21Y
LINE22Y
---第5部分使用Apply Sync进行多处理---
上次我使用“Pool”进行多处理。这次我使用的是“Apply Async”方法。我已将ApplyAsync段添加到原始函数中,以便于比较和对比。
enter '''
# *** Python 3.6.5 multiprocessing translate - tlfong01 2018apr20hk1549 ***
def parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles():
# *** input output file name configuration ***
inputTextFileName1 = 'inputTextFile1.txt'
inputTextFileName2 = 'inputTextFile2.txt'
outputTextFileName1 = 'outputTextFile8.txt'
outputTextFileName2 = 'outputTextFile9.txt'
# *** Using pool to translate the text files ***
pool = mp.Pool(4)
pool.map(shiftUpperCaseFileNameList, \
[
[inputTextFileName1, outputTextFileName1],
[inputTextFileName2, outputTextFileName2],
]
)
print('\n*** Using pool to translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
# *** Using Apply Async translate the text files ***
pool = mp.Pool(2)
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName1, outputTextFileName1],))
pool.apply_async(shiftUpperCaseFileNameList, ([inputTextFileName2, outputTextFileName2],))
pool.close()
pool.join()
# *** print input out files ***
print('\n*** Using Apply Async translate the text files ***\n')
printOneTextFile(inputTextFileName1)
printOneTextFile(outputTextFileName1)
printOneTextFile(inputTextFileName2)
printOneTextFile(outputTextFileName2)
return
# *** Main ***
def test0():
createTwoTextFiles()
#printTwoInputTextFiles()
#sequentialTranslateTwoTextFiles()
#printTwoOutputTextFiles()
#sequentialShiftUpperCaseManyTextFiles()
parallelPoolAndApplyAsyncShiftUpperCaseManyTextFiles()
return
def main():
test0()
return
if __name__ == '__main__':
main()
# *** End ***
# *** Sample output ***
'''
'''
>>>
RESTART: D:\work\rpi3b\programs\parallel_python\programs\mtp01_2018apr1905.py
*** Using pool to translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
*** Using Apply Async translate the text files ***
inputTextFile1.txt
line11x
line12x
outputTextFile8.txt
LINE11X
LINE12X
inputTextFile2.txt
line21y
line22y
outputTextFile9.txt
LINE21Y
LINE22Y
>>>
'''
---第6部分---
/继续,......