解析大文本文件,提取数据&将其存储在CSV文件中..太慢了

时间:2015-04-08 07:56:26

标签: python file parsing csv

我有一个大日志文件(比如1-3 Gb)我需要解析,提​​取数据和将其保存在CSV文件中。

文本文件数据

  *    D:40035FC8 wr-long         00000008 \\core0\Global\u4TimeHiCnt         1.000us
  *    D:40027C5C rd-byte               00 *core0\Global\Ypf_OILL_OilLvlOn   20.342us
  *    D:40010044 rd-word             0FE2 *l\u2SAD_OILLVS_RecoveryCounter    0.160us
  *    D:40010044 wr-word             0FE1 *l\u2SAD_OILLVS_RecoveryCounter    0.040us
  *    D:40035FC8 wr-long         00000008 \\core0\Global\u4TimeHiCnt         1.000us

我必须提取在最后一个\之后的变量名,然后提取Read&的数量。写入数据类型&将其存储在CSV文件中。

CSV文件结果

Variable        Datatype     CORE 0      CORE 1      CORE X 
                          Read   Write   Read    Write   Read    Write 

 OS_inKernel         byte   0     0      111768 111878     0    0

 OS_globalIntLevel   long   0     0      281604 237901     0    0

问题是需要花费太多时间。你能不能查看所附的代码和放大器。建议如何加快速度。

import string
import sys
import time

MyFile = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata

GeneratedFile = open(str(("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\")+'ParsedOutput.csv'),'w')

try:

    MyVariableList = []
    TimeStartTest       = time.time()        #Starting Time     

    GeneratedFile.write('\nVariable')
    GeneratedFile.write(', Datatype')
    GeneratedFile.write(', CORE 0')
    GeneratedFile.write(',, CORE 1')
    GeneratedFile.write(',, CORE X')

    GeneratedFile.write('\n,, Read ')
    GeneratedFile.write(', Write ')

    GeneratedFile.write(', Read ')
    GeneratedFile.write(', Write ')

    GeneratedFile.write(', Read ')
    GeneratedFile.write(', Write ')                            
    GeneratedFile.write('\n')      



    for CurrentLine in MyFile:

        NoofSpaces = 0

        if CurrentLine.find('\\') != -1:
            MyVariable     = CurrentLine[CurrentLine.rfind('\\')+1:].split(' ')[0]             
        elif CurrentLine.find('*\\') != -1:
            MyVariable     = CurrentLine[CurrentLine.rfind('*\\')+1:].split(' ')[0]             
        elif CurrentLine.find('*') != -1:                
            MyVariable     = CurrentLine[CurrentLine.rfind('*')+1:].split(' ')[0]    

        VariableFound = 0            
        MyVariableList.sort()

        Lowerbound = 0
        Upperbound = len(MyVariableList)-1

        while Lowerbound <= Upperbound and VariableFound == 0:
            middle_pos = (Lowerbound+Upperbound) // 2
            if MyVariableList[middle_pos] < MyVariable:
                Lowerbound = middle_pos + 1
            elif MyVariableList[middle_pos] > MyVariable:
                Upperbound = middle_pos - 1
            else:
                VariableFound = 1        

        if VariableFound == 0:            
            MyVariableList.append(MyVariable) 
            try:
                MyFile1 = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata

                Core0_ReadCount  = 0
                Core0_WriteCount = 0
                Core1_ReadCount  = 0
                Core1_WriteCount = 0
                CoreX_ReadCount  = 0
                CoreX_WriteCount = 0                    
                for CurrentLine1 in MyFile1:

                    if CurrentLine1.find(MyVariable) != -1:
                        ##   CORE 0  ##
                        if CurrentLine1.find("0\\Global") != -1:
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                Core0_ReadCount = Core0_ReadCount + 1
                            elif DataOperation == 'wr':
                                Core0_WriteCount = Core0_WriteCount + 1                                              
                        ##   CORE 1  ##                        
                        elif CurrentLine1.find("1\\Global") != -1:                                
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                Core1_ReadCount = Core1_ReadCount + 1
                            elif DataOperation == 'wr':
                                Core1_WriteCount = Core1_WriteCount + 1                            
                        ##   CORE X  ##                        
                        else:
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                CoreX_ReadCount =  CoreX_ReadCount + 1
                            elif DataOperation == 'wr':
                                CoreX_WriteCount = CoreX_WriteCount + 1

                GeneratedFile.write('\n %s' %MyVariable)
                GeneratedFile.write(', %s' %DataType)                            
                GeneratedFile.write(', %d' %Core0_ReadCount)
                GeneratedFile.write(', %d' %Core0_WriteCount)             
                GeneratedFile.write(', %d' %Core1_ReadCount)
                GeneratedFile.write(', %d' %Core1_WriteCount)  
                GeneratedFile.write(', %d' %CoreX_ReadCount)
                GeneratedFile.write(', %d' %CoreX_WriteCount)                            
                GeneratedFile.write('\n')                                                    


            finally:
                MyFile1.close()            

except:

    print sys.exc_info()       

finally:

    GeneratedFile.close()  
    MyFile.close()
    TimeStopTest       = time.time()
    print str(int((TimeStopTest - TimeStartTest)/60))

1 个答案:

答案 0 :(得分:0)

您最好使用with语句,如下所示:

# if this file is line based
with open('test.txt') as f:
    for line in f:
        # process line, do something with line