简化大数据处理脚本

时间:2017-07-04 15:20:36

标签: python-3.x large-data

我正在尝试执行以下操作但需要花费很多时间。 有人可以建议更快的方法吗

f = open('answer.csv','w')
f.write('Datetime,0: Vm,0: Va,1: Vm,1: Va,2: Vm,2: Va,3: Vm,3: Va,4: Vm,4: Va,5: Vm,5: Va,6: Vm,6: Va,7: Vm,7: Va,8: Vm,8: Va,9: Vm,9: Va,10: Vm,10: Va,11: Vm,11: Va,12: Vm,12: Va,13: Vm,13: Va\n')
# 'n' is around 8000000
# 'PQ_data' is a pandas DataFrame with more than n rows
# 'class' is a python class object with some functions in it
for i in range(n):
    p = []
    q = []
    for j in range(1,14):
        if j<=10:
            p.append(PQ_data['{} P'.format(j)][i])
            q.append(PQ_data['{} Q'.format(j)][i])
        else:
            p.append(0)
            q.append(0)

    class.do_something(p,q)
    vm = class.get_Vm().tolist()
    va = class.get_Va().tolist()
    # above methods return 14 length lists.
    # PQ_data.index has datetime values
    f.write('{}'.format(PQ_data.index[i]))
    for j in range(len(vm)):
        f.write(',{},{}'.format(vm[j],va[j]))
    f.write('\n')
f.close()

1 个答案:

答案 0 :(得分:0)

试试这个。如果没有,您可能需要在其上抛出多处理

import csv
import itertools

with open('answer.csv','w') as fout:
    outfile = csv.writer(fout)
    outfile.writerow(['Datetime', '0: Vm', '0: Va', '1: Vm', '1: Va', '2: Vm', '2: Va', '3: Vm', '3: Va', '4: Vm', '4: Va', '5: Vm', '5: Va', '6: Vm', '6: Va', '7: Vm', '7: Va', '8: Vm', '8: Va', '9: Vm', '9: Va', '10: Vm', '10: Va', '11: Vm', '11: Va', '12: Vm', '12: Va', '13: Vm', '13: Va'])

    for i in range(n):
        p = [PQ_data['{} P'.format(j)][i] for j in range(1,11)] + [0]*3
        q = [PQ_data['{} Q'.format(j)][i] for j in range(1,11)] + [0]*3

        class.do_something(p,q)
        vm = class.get_Vm().tolist()
        va = class.get_Va().tolist()

        row = itertools.chain([PQ_data.index[i]], itertools.chain.from_iterable((vm[j],va[j]) for j in range(len(vm))))
        outfile.writerow(row)