考虑代码:
我在那里用6列(0-5)
读取输入文件最后我使用while循环重复整个过程,将变量historyends分配给下一个项目int(targetjobs [1] [0])
from __future__
import division
import itertools
history_begins = 1; history_ends = 5000; n = 0; total = 0
historyjobs = []; targetjobs = []
listsub = []; listrun = []; listavg = [] ; F = [] ; condsub = [] ;condrun = [] ;mlistsub = []; a = []
def check(inputfile):
f = open(inputfile,'r') #reads the inputfile
lines = f.readlines()
for line in lines:
job = line.split()
if( int(job[0]) < history_ends ): #if the column0 is less then history_ends(i,e 5000 initially)
historyjobs.append(job) #historyjobs list contains all the lines from the list whose column1 < history_ends
else:
targetjobs.append(job) #historyjobs list contains all the lines from the list whose column1 > history_ends
k = 0
for i, element in enumerate(historyjobs):
if( (int(historyjobs[i][3]) == int(targetjobs[k][3])) and (int(historyjobs[i][4]) == int(targetjobs[k][4])) and (int(historyjobs[i][5]) == int(targetjobs[k][5])) ): #historyjobs list all contents in column3,column4,column5 is equal to targetjobs first list column3,column4,column5
listsub.append(historyjobs[i][1]) #when if condition true add those historyjobs column1 to list listsub
def runningMean(iterable):
"""A generator, yielding a cumulative average of its input."""
num = 0
denom = 0
for x in iterable:
num += x
denom += 1
yield num / denom
def newfun(results):
results.reverse() # put them back in regular order
for value, average in results:
a.append(value)
return a #to return the value
def runcheck(subseq):
f = open('newfileinput','r') #again read the same inputfile
lines = f.readlines()
for line in lines:
job = line.split()
for i, element in enumerate(subseq):
if(int(job[1]) == int(subseq[i])): # if the column1 value of the inputfile becomes equal to list obtained
condrun.append(str(job[2])) #return the value of column2 which satisfies the if condition
return condrun
def listcreate(condrun,condsub):
f1 = open('outputfile','a') #outputfile to append the result
s = map(int,condrun)
j = int(targetjobs[0][2])
targetsub = int(targetjobs[0][1])
if(condsub != []):
try:
convertsub = int(condsub[-1])
a=sum(s)/len(s)
c=max(s)
d=min(s)
e1=abs(j-a)
er1=e1/j
g=len(s)
h=abs(convertsub-targetsub)
f1.write(str(j))
f1.write('\t')
f1.write('\t')
f1.write(str(round(a,2)))
f1.write('\t')
f1.write('\t')
f1.write(str(round(er1,3)))
f1.write('\t')
f1.write('\t')
f1.write(str(c))
f1.write('\t')
f1.write('\t')
f1.write(str(d))
f1.write('\t')
f1.write('\t')
f1.write(str(g))
f1.write('\t')
f1.write('\t')
f1.write(str(h))
f1.write('\t')
f1.write("\t")
if (float(er1) < 0.20):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.30):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.40):
f1.write("good")
f1.write("\t")
else :
f1.write("bad")
f1.write("\t")
if (float(er1) < 0.50):
f1.write("good")
f1.write("\n")
else :
f1.write("bad")
f1.write("\n")
except ZeroDivisionError :
print 'dem 0'
else:
print '0'
f1.close()
def new():
global history_ends
while 1: #To repeat the process untill the EOF(end of input file)
check('newfileinput') #First function call
if(len(targetjobs) != 1):
history_ends = int(targetjobs[1][0]) #initialize historyends to targetjobs second lines first item
mlistsub = map(int,listsub)
results = list(itertools.takewhile(lambda x: x[0] > 0.9 * x[1],
itertools.izip(reversed(mlistsub),
runningMean(reversed(mlistsub)))))#call runningmean function & check the condition
condsub = newfun(results) #function to reverse back the result
condrun=runcheck(condsub) #functionto match & return the value
listcreate(condrun,condsub) #function to write result to output file
del condrun[0:len(condrun)]#to delete the values in list
del condsub[0:len(condsub)]#to delete the values in list
del listsub[0:len(listsub)]#to delete the values in list
del targetjobs[0:len(targetjobs)]#to delete the values in list
del historyjobs[0:len(historyjobs)]#to delete the values in list
else:
break
def main():
new()
if __name__ == '__main__':
main()
示例输入文件(整个文件包含200,000行):
1 0 9227 1152 34 2
2 111 7622 1120 34 2
3 68486 710 1024 14 2
6 265065 3389 800 22 2
7 393152 48438 64 132 3
8 412251 46744 64 132 3
9 430593 50866 256 95 4
10 430730 10770 256 95 4
11 433750 12701 256 14 3
12 437926 2794 64 34 2
13 440070 43 32 96 3
13 440070 43 32 96 3
14 440102 44 32 96 3
15 440357 43 32 96 3
16 440545 43 32 96 3
17 440599 43 32 96 3
18 440625 43 32 96 3
19 440999 84 32 96 0
20 441574 44 32 96 3
21 442667 7914 512 14 3
22 443249 45 32 96 3
25 443797 3260 128 68 4
26 443799 3746 128 68 4
27 445357 31 8 29 3
28 445393 31 8 29 3
29 445445 28 8 29 3
30 445488 29 8 29 3
示例输出文件内容:
930 1389.14 0.494 3625 977 7 15 bad bad bad good
4348 1331.75 0.694 3625 930 8 164 bad bad bad bad
18047 32237.0 0.786 61465 17285 3 325774 bad bad bad bad
1607 1509.0 0.061 1509 1509 1 6508 good good good good
304 40.06 0.868 80 32 35 53472 bad bad bad bad
7246 7247.0 0.0 7247 7247 1 9691 good good good good
95 1558.0 15.4 1607 1509 2 2148 bad bad bad bad
55 54.33 0.012 56 53 3 448142 good good good good
31 76.38 1.464 392 35 13 237152 bad bad bad bad
207 55.0 0.734 55 55 1 370 bad bad bad bad
如果有人可以建议一些代码运行得更快的更改,那将会很有帮助......
输入文件:
column 0-->represents jobnum
column 1-->represents submittime
column 2-->represents runtime
column 3-->represents userid
column 4-->represents numberof processor
column 5-->represents queueid
我维护输入文件中的前5000行,因为某种历史记录读取第5001行比较它的col3,col4,col5值是否等于5000lines中的值。如果大约20行具有匹配值,则满足所有那些条件col2到列表。 找到此列表的运行平均值&amp;将结果存储到另一个list1.Now检查所有项目的条件(list1&gt; list * 0.9)满足条件的项目将其添加到另一个列表list3。 list3中与历史中5000行的col0匹配的所有项目,将col2存储到list4中。现在我需要打开一个文件来编写最终list4的avg。通过将历史记录递增到下一行来重复相同的操作,直到EOF < / p>
答案 0 :(得分:3)
check
和runcheck
都有此代码读取整个文件,然后遍历所有行。替换以下内容:
lines = f.readlines()
for line in lines:
使用:
for line in f:
只能一次读取和处理这些行。
答案 1 :(得分:1)
我认为以下代码与您的代码相同:
from __future__ import division
from sys import exit
history_begins = 1
history_ends = 16
historyjobs = []
targetjobs = []
def quickzh(zhlistsub,
historyjobs=historyjobs):
rev = reversed(zhlistsub)
i = next(rev)
num = historyjobs[i][1]
denom = 1
hfirst = num
li = [historyjobs[i][2]]
for i in rev:
x = historyjobs[i][1]
num += x
denom += 1
if x > 0.9 * (num / denom):
li.append(historyjobs[i][2])
else:
break
li.reverse()
return hfirst,li
def listcreate(hfirst,s,
historyjobs=historyjobs,targetjobs=targetjobs):
with open('outputfile.txt','a') as f1:
j = targetjobs[0][2]
try:
a,c,d,g = sum(s)/len(s), max(s), min(s), len(s)
e1 = abs(j-a)
er1 = e1/j
h = abs(hfirst-targetjobs[0][1])
# historyjobs[-1][1] is convertsub
# targetjobs[0][1] is targetsub
f1.write('%s\t%s\t%s\t%s\t%s\t%s\t%s\t' %
(j, round(a,2), round(er1,3), c, d, g, h))
f1.write("%s\t%s\t%s\t%s\n" %
tuple("good" if er1 < x else "bad" for x in (0.2,0.3,0.4,0.5)))
except ZeroDivisionError :
print 'dem 0'
else:
print '0'
f1.close()
def new(inputfile,history_ends,
historyjobs=historyjobs,targetjobs=targetjobs):
while 1:
# checking the file
with open(inputfile,'r') as f:
for line in f:
job = map(int,line.split())
(historyjobs if job[0] < history_ends
else targetjobs).append(job)
if len(targetjobs) != 1:
k = 0
zhlistsub = [i for i, element in enumerate(historyjobs)
if element[3:6] == targetjobs[k][3:6] ]
if zhlistsub:
listcreate(*quickzh(zhlistsub))
history_ends = targetjobs[1][0]
del targetjobs[:]
del historyjobs[:]
else:
break
new('toto.txt',history_ends)
使用您提供的输入样本和history_ends = 16
,输出文件变为:
43 43.5 0.012 44 43 2 188 good good good good
43 43.33 0.008 44 43 3 54 good good good good
43 43.25 0.006 44 43 4 26 good good good good
44 43.2 0.018 44 43 5 949 good good good good
45 49.14 0.092 84 43 7 1675 good good good good
3746 3260.0 0.13 3260 3260 1 2 good good good good
31 31.0 0.0 31 31 1 36 good good good good
28 31.0 0.107 31 31 2 52 good good good good
如果您需要解释,请问我 原则是跟踪tyhe指数,而不是历史记录中的值。
只是一句话:
在生成器函数runningMean()
中:
for x in iterable:
num += x
denom += 1
yield num / denom
对于可迭代的第一个元素,denom
等于1,然后num/denom
等于num
。
因此,在takewhile(lambda x: x[0] > 0.9 * x[1],
izip(reversed(mlistsub),
runningMean(reversed(mlistsub)))))
中
第一个元素x
始终属于(el,el)
种类,然后x[0] > 0.9 * x[1]
对于第一个元素始终为true,然后results
和consub
永远不会为空列表。
因此,在listcreate()
函数中,传递的参数condsub
永远不会为空,条件if consub != |]
始终为真。
这就是为什么在我的代码中,这种情况已经消失了。
如果输入文件的第一列(读作作业[0])的值增加,则可以将new()
修改为:
def new(inputfile,history_ends,
historyjobs=historyjobs,targetjobs=targetjobs):
# checking the file
with open(inputfile,'r') as f:
for line in f:
job = map(int,line.split())
(historyjobs if job[0] < history_ends
else targetjobs).append(job)
while True:
k = 0
zhlistsub = [i for i, element in enumerate(historyjobs)
if element[3:6] == targetjobs[k][3:6] ]
if zhlistsub:
listcreate(*quickzh(zhlistsub))
tj00 = targetjobs[0][0]
while True:
if targetjobs[0][0]!=tj00:
break
historyjobs.append(targetjobs.pop(0))
if len(targetjobs)==0:
break
如果值严格增加,也就是说第1列中没有两行具有相同的值,我认为你可以简化为:
def new(inputfile,history_ends,
historyjobs=historyjobs,targetjobs=targetjobs):
# checking the file
with open(inputfile,'r') as f:
for line in f:
job = map(int,line.split())
(historyjobs if job[0] < history_ends
else targetjobs).append(job)
while True:
k = 0
zhlistsub = [i for i, element in enumerate(historyjobs)
if element[3:6] == targetjobs[k][3:6] ]
if zhlistsub:
listcreate(*quickzh(zhlistsub))
historyjobs.append(targetjobs.pop(0))
if len(targetjobs)==0:
break
注意,我仍然不理解k