Python内存使用

时间:2014-03-05 04:36:55

标签: python memory-management

所以我有一些代码可以接收一组文件,将它们拼凑在一起并绘制它们。 我发布了大量代码,试图使其更具可读性,如果需要可以添加更多

for paths,dirs,files in os.walk(start_path):
for d in dirs:
    path = start_path +  changedir + d
    pathpart = d

    os.chdir(path)
    for file in glob.glob("*-0.dat"):
        tempname = file.split("-")
        fileName1 = str(tempname[0] + "-" + tempname[1]+ "-")
        gc.collect()

        Chan2 = []
        Chan1 = []
        temp_1 = []
        temp_2 = []
        temp_3 = []
        Data_Sets1 = []
        Data_Sets2 = []
        Headers = []


        for fileNumber in range(0,45):
            fileName = fileName1 + str(fileNumber) + fileName3
            header, data1, data2 = u.unpackFile(path,fileName)

            if header == None:
                logging.warning("curropted file found at " + fileName)
                Data_Sets1.append(temp_1)
                Data_Sets2.append(temp_2)
                Headers.append(temp_3)
                temp_1 = []
                temp_2 = []
                temp_3 = []
            else:
                logging.info(fileName + " is good!")
                temp_3.append(header)
                for i in range(0,10000):
                    temp_1.append(data1[i])
                    temp_2.append(data2[i])

        Data_Sets1.append(temp_1)
        Data_Sets2.append(temp_2)
        Headers.append(temp_3)
        temp_1 = []
        temp_2 = []
        temp_3 = []

        del temp_1
        del temp_2
        del temp_3

        lengths = []
        for i in range(len(Data_Sets1)):
            lengths.append(len(Data_Sets1[i]))
        index = lengths.index(max(lengths))

        Chan1 = Data_Sets1[index]
        Chan2 = Data_Sets2[index]
        Start_Header = Headers[index]
        if (len(Chan1) == 0 | len(Chan2) == 0):
            continue
        try:
            Date = Start_Header[index][0]
            Time = Start_Header[index][1]
        except IndexError:
            logging.critical("file " + fileName + " is unusuable")
            continue
        """
        Clostest_Power = int(np.log(len(Chan1))/np.log(2))
        Length = 2 ** Clostest_Power
        logging.debug("Length of the file is " + str(Length))
        Chan1 = Chan1[0:Length]
        Chan2 = Chan2[0:Length]
        """
        logging.debug("Length of channels is " + str(len(Chan1)))   

        window = np.hanning(Window_Width)

        t= s.Time_Array(len(Chan1),Sample_Rate)


        window2 = np.hanning(len(Chan1))

        Noise_Frequincies = []
        for i in range(1,125):
            Noise_Frequincies.append(60.0*float(i))
        Noise_Frequincies.append(180.0)

        filter1 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan1)
        filter2 = s.Noise_Reduction(Sample_Rate,Noise_Frequincies,Chan2)

        logging.info("Starting the plots")


        fig1, (ax1, ax2) = plt.subplots(nrows=2)


        spec1, freqs1, time1 = mlab.specgram(filter1, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)


        im1 = ax1.imshow(spec1, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
            extent=[t[0], t[len(t)-1], freqs1.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)

        ax1.set_title(str(Date) + "-" + str(Time) + " Channel 1")
        ax1.set_ylabel("Freqency Hz")



        spec2, freqs2, time2 = mlab.specgram(filter2, NFFT=Window_Width, Fs=Sample_Rate, window=window, noverlap=Over_Lap)

        im2 = ax2.imshow(spec2, cmap=cm.get_cmap("rainbow"), norm=colors.LogNorm(), origin='lower',
            extent=[t[0], t[len(t)-1], freqs2.min(), 8000],aspect='auto',vmin=1e-5,vmax=1e5)

        cax1, kw1 = matplotlib.colorbar.make_axes(ax1)
        colorbar(im1,cax=cax1,**kw1)
        cax2, kw2 = matplotlib.colorbar.make_axes(ax2)
        colorbar(im2,cax=cax2,**kw2)

        ax2.set_title(str(Date) + "-" + str(Time) + " Channel 2")
        ax2.set_ylabel("Freqency Hz")


        save1 = save_path+pathpart + changedir+specgram_path
        if not os.path.exists(save1):
            os.makedirs(save1)
        savefig(os.path.join(save1,str(Date) + "-" + str(Time) + "-Power_Spec1.png"))

        logging.info("Spectrogram path is " + save1)




        fig2, (ax4,ax6) = plt.subplots(nrows=2)
        final_fft = []
        fft = s.Full_FFT(filter1,window2)
        for i in range(0,len(fft)):
            final_fft.append(np.absolute(fft[i]))



        freqs = []
        for i in range(0,len(final_fft)):
            freqs.append(i*Sample_Rate/float(len(final_fft)))

        ax4.plot(freqs, final_fft)

        new_fft = []
        new = s.Full_FFT(filter2,window2)
        for i in range(0,len(new)):
            new_fft.append(np.absolute(new[i]))
        ax6.plot(freqs,new_fft)


        save2 = save_path+pathpart+ changedir + freq_path
        logging.info("Frequency path is " + save2)
        if not os.path.exists(save2):
            os.makedirs(save2)

        savefig(os.path.join(save2,str(Date) + "-" + str(Time) + "-Freq.png"))

        ax4.set_title(str(Date) + "-" + str(Time) +" Channel 1")
        ax4.set_xlabel("Bins")
        ax4.set_ylabel("Power")


        ax6.set_title(str(Date) + "-" + str(Time) + " Channnel 2")
        ax6.set_xlabel("Bins")
        ax6.set_ylabel("Power")

        fig3, (ax7, ax9) = plt.subplots(nrows=2)
        ax7.plot(t,filter1)
        ax9.plot(t,filter2)

        save3 = save_path+pathpart + changedir +signal_path
        if not os.path.exists(save3):
            os.makedirs(save3)
        savefig(os.path.join(save3,str(Date) + "-" + str(Time) + "-Signal.png"))

        logging.info("Signal path is " + save3)

        fig1.clf()
        fig2.clf()
        fig3.clf()
        matplotlib.pyplot.clf()
        close('all')
        gc.collect()

这是解包代码

def unpackFile(path,fileName):
header = ""
startKey = ""
dataList = []
chan1 = []
chan2 = []
found = False
logging.info("Starting file " + fileName)
if not os.path.isfile(os.path.join(path,fileName)):
    logging.warning("could not find "+fileName)
    return None, None, None
try:
    contents = open(os.path.join(path,fileName),'rb')
except IOError:
    logging.warning(fileName + " Not found")
    return None, None, None



#looks for the closing bracket in the header of the file
filesize = os.path.getsize(os.path.join(path,fileName))
if filesize < 1000:
    logging.warning(fileName + " is below 1000 bytes")
    contents.close()
    contents = None
    return None, None, None
while found==False:
    char = contents.read(1)
    #print char
    header = header + char
    if char == "}":
        #Once the close bracket is found, the next 10 characters should be the start key
        startKey = contents.read(10)
        #header = header + startKey
        #print("found the }")
        found = True 
if startKey=="Data_Start":
    logging.info("Found start key for file "+fileName)
else:
    logging.warning("No start key found " + fileName + " is corrupted")
    contents.close()
    contents = None
    return None, None, None   
#Looks for the end key in the file 
try:
    logging.debug("Reading the data")
    data = contents.read(40000)
    #endKey = data[len(data)-10:len(data)]
    endKey = contents.read()
except IOError:
    logging.warning("IOE error trying to read the end key")
    endKey=""
    contents.close()
    contents = None
    return None, None, None

if endKey == "Data_Stop ":
    logging.debug("Found end key " )
else:
    logging.debug("No end key found in" +fileName)
#Unpacks the data from binary into signed ints
for i in range(0,len(data),2):
    value = data[i:i+2]
    if len(value)==2:
        number = struct.unpack('>h',data[i:i+2])
        #print number
        dataList.append(number[0])
    else:
        break
logging.debug("total points found is " + str(len(dataList)))
#Splits data into two channels
for j in range(0,len(dataList)):
    if j%2==0:
        chan2.append(dataList[j])
        #if dataList[j] != 0:
            #print("chan2 has a non 0 " + str(j))
    else:
        chan1.append(dataList[j])
#Checks to make sure both channels contain 10000 data points. If this is not true the file is curppted
if len(chan2)!=10000:
    logging.warning("Chanel 2 did not containg the right number of data points, " + fileName + " is corupted")
    contents.close()
    contents = None
    return None, None, None
if len(chan1)!=10000:
    logging.warning("Chanel 1 did not containg the right number of data points, " + fileName + " is corupted")
    contents.close()
    contents = None
    return None, None, None
contents.close()
contents = None

header = header[1:len(header)-1]
header_parts = header.split(',')
return header_parts,chan1,chan2

某处是内存泄漏,我不知道在哪里。我试图让代码遍历目录,挑选数据集,然后绘制它们。 几分钟后,这会吃掉几GB的内存。 任何减少它们的技巧?

1 个答案:

答案 0 :(得分:1)

在像您这样的情况下,我更愿意将清洁工作留给操作系统。因此,我运行泄漏内存的代码作为一个单独的进程:

from multiprocessing import Process, Queue

def memory_leaking_code(arg1, q):
   """Your memory leaking code goes here"""
   print arg1
   q.put('data from memory_leaking_code()')

def main():
   q = Queue()
   p = Process(target=memory_leaking_code, args=('data to memory_leaking_code()', q))
   p.start()
   print q.get()
   if p.is_alive():
      p.terminate()

main()