我有一个代码,该代码使用线程定期从mysql数据库读取数据并为该数据创建数据框。在较短的时间间隔内,它使用线程来定期使用该数据(在此示例中为简单打印)。每次读取新数据时,旧数据就变得无关紧要。问题在于,每次第一个线程(获取数据)运行时,此代码都会累积内存使用量。如何预防?
我尝试过使用垃圾收集器进行操作,但是没有成功。
## import libraries
from time import sleep
import mysql.connector as sql
import pandas as pd
import threading
import numpy as np
import gc
## define function to get data from mysql database and make pandas dataframe
def get_HLC():
db=sql.connect(host='localhost',user='root',password='',database='algo')
df=pd.read_sql('select *from ticks',con=db,parse_dates=True)
df=pd.DataFrame(df)
df=df.set_index(['timestamp'])
df.index = pd.to_datetime(df.index.astype(np.int64), unit='ms')
df2 = df.resample('60s', how={'last_price': 'ohlc'})
df3 = df.resample('60s', how={'volume': 'sum'})
df3.columns = pd.MultiIndex.from_tuples([('volume', 'sum')])
df4 = pd.concat([df2, df3], axis=1)
df4.iloc[:,3] = df4.iloc[:,3].fillna(method='ffill')
df4.iloc[:,0] = df4.iloc[:,0].fillna(value=df4.iloc[:,3])
df4.iloc[:,1] = df4.iloc[:,1].fillna(value=df4.iloc[:,3])
df4.iloc[:,2] = df4.iloc[:,2].fillna(value=df4.iloc[:,3])
df5 = df4.iloc[-230:,:]
db.close()
del(db, df, df2, df3, df4)
gc.collect()
return df5['last_price']['high'], df5['last_price']['low'], df5['last_price']['close']
## define function that gets the data in a loop
def thread1_Function():
while True:
global low
df5 = get_HLC()
low = df5[1]
del(df5)
gc.collect()
sleep(30)
## start a thread of the get data loop
thread1 = threading.Thread(target=thread1_Function)
thread1.start()
## pause
sleep(5)
## define function that loops and does things with data of thread1
def thread2_Function():
while True:
#do things with thread1 data
print(low)
sleep(15)
## start a thread of the do things with data of thread1 loop
thread2 = threading.Thread(target=thread2_Function)
thread2.start()