我写了一段简短的python代码来捕获一些运行时统计信息,这些信息可以帮助我们评估性能。它使用一个csv查询,运行它们,并捕获执行时间。我想捕获我运行的每个查询的查询概要文件,而不仅仅是执行时间,然后将它们转储到日志或表中。这可能吗?
import pandas as pd
import pymysql
import time
import numpy as np
connec = pymysql.connect(host="xxxx",user="xxxx",password="xxxx",database="xxxx")
csv_location = r'/Users/xxx/Downloads/test_data.csv'
def run_query(q,conn):
#time.sleep(1)
print ('running query: ', q)
start_time = time.time()
pd.read_sql(q,conn)
t = (time.time() - start_time)
return t
# capture queries to run
df_queries = pd.read_csv(csv_location,names=['query'])
# multiple connections possible using if required
df_queries["time1"] = np.vectorize(run_query)(df_queries["query"],connec)
df_queries["time2"] = np.vectorize(run_query)(df_queries["query"],connec)
df_queries["time3"] = np.vectorize(run_query)(df_queries["query"],connec)
df_queries["time4"] = np.vectorize(run_query)(df_queries["query"],connec)
df_queries["time5"] = np.vectorize(run_query)(df_queries["query"],connec)
connec.close()
# stats
df_queries['median'] = df_queries.median(axis=1)
df_queries['percentile_90th'] = df_queries.quantile(axis=1,q=0.9)
df_queries['stdev'] = df_queries.std(axis=1)
df_queries['max'] = df_queries.max(axis=1)
# dump results to csv
df_queries.to_csv('test.csv')