我编写了一个连接到Oracle数据库并将结果存入日志文件的脚本。我想得到这样的输出:
FEC_INCLUSION = 2005-08-31 11:43:48,DEBITO_PENDIENTE =无,CAN_CUOTAS = 1.75e-05,COD_CUENTA = 67084,INT_TOTAL =无,CAN_CUOTAS_ANTERIOR =无,COD_INVERSION = 1,FEC_MODIFICACION = 10/04/2012 09:45:22,SAL_TOT_ANTERIOR =无,CUOTA_COMISION =无,FEC_ULT_CALCULO =无,MODIFICADO_POR = CTAPELA,SAL_TOTAL = 0.15,COD_TIPSALDO = 1,MONTO_COMISION =无,COD_EMPRESA = 1,SAL_INFORMATIVO =无,COD_OBJETIVO = 5,SAL_RESERVA =无, INCLUIDO_POR = PVOROPE,APORTE_PROM = 0.0,COSTO_PROM =无,CREDITO_PENDIENTE =无,SAL_PROM = 0.0,
FEC_INCLUSION = 2005-08-31 11:43:49,DEBITO_PENDIENTE =无,CAN_CUOTAS = 0.0,COD_CUENTA = 67086,INT_TOTAL =无,CAN_CUOTAS_ANTERIOR =无,COD_INVERSION = 9,FEC_MODIFICACION = 25/02/2011 04:38 :52,SAL_TOT_ANTERIOR =无,CUOTA_COMISION =无,FEC_ULT_CALCULO =无,MODIFICADO_POR = OPEJAMO,SAL_TOTAL = 0.0,COD_TIPSALDO = 1,MONTO_COMISION =无,COD_EMPRESA = 1,SAL_INFORMATIVO =无,COD_OBJETIVO = 5,SAL_RESERVA =无,INCLUIDO_POR = PVOROPE ,APORTE_PROM = 0.0,COSTO_PROM =无,CREDITO_PENDIENTE =无,SAL_PROM = 0.0,
我用查询结果创建了一个字典:
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
然后我创建了这个函数,最后将结果保存到我的日志中:
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
这是主要功能:
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
data = cursor.fetchall()
WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
但我意识到,如果我使用一个获取少量记录的查询,它就有效,但是如果我尝试获取许多记录,我的脚本永远不会结束。
当我执行包含5000条记录的查询时,这是我的输出。你可以看到它需要太长时间。
jballesteros@SplunkPorvenir FO_TIPSALDOS_X_CUENTA]$ python db_execution.py
Starting connection: 5636
GetLastCheckpoint function took 0.073 ms
GetLastCheckpoint function took 0.025 ms
ReadQuery function took 0.084 ms
File does not exist, writing new log file
DictFactory function took 23.050 ms
ReadCheckpointName function took 0.079 ms
WriteCheckpoint function took 0.204 ms
WriteLog function took 45112.133 ms
GetInfo function took 46193.033 ms
我很确定你知道一个更好的方法来做我想做的事情。
这是完整的代码:
#!/usr/bin/env python
# encoding: utf-8
import re
import sys
try:
import cx_Oracle
except:
print "Error: Oracle module required to run this plugin."
sys.exit(0)
import datetime
import re
import commands
import os
from optparse import OptionParser
import csv
import time
#################################
#### Database Variables ####
#################################
Config = {
"host" : "",
"user" : "",
"password" : "",
"instance" : "",
"port" : "",
}
Query = {
"sql" : "",
"checkpoint_datetype" : "",
"checkpoint_name" : "",
}
dir = '/home/jballesteros/PENS2000/FO_TIPSALDOS_X_CUENTA/'
connection_dir = '/home/jballesteros/PENS2000/Connection'
checkpoint_file = dir + 'checkpoint.conf'
log_file = '/var/log/Pens2000/FO_TIPSALDOS_X_CUENTA.csv'
internal_log = '/var/log/Pens2000/internal.log'
query = dir + 'query'
sys.path.append(os.path.abspath(connection_dir))
from db_connect_pool import *
def Timing(f):
def wrap(*args):
time1 = time.time()
ret = f(*args)
time2 = time.time()
print "%s function took %0.3f ms" % (f.func_name,(time2- time1)*1000.0)
return ret
return wrap
@Timing
def InternalLogWriter(message):
now = datetime.datetime.now()
log = open(internal_log, 'a')
log.write("%s ==> %s" % (now.strftime("%Y-%m-%d %H:%M:%S"),message))
log.close()
return
@Timing
def GetLastCheckpoint():
global cur_checkpoint
conf = open(checkpoint_file, 'r')
cur_checkpoint = conf.readline()
cur_checkpoint = cur_checkpoint.rstrip('\n')
cur_checkpoint = cur_checkpoint.rstrip('\r')
conf.close()
@Timing
def ReadQuery():
global cur_checkpoint
GetLastCheckpoint()
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
sql = Query["sql"]
checkpoint_datetype = Query["checkpoint_datetype"]
checkpoint_name = Query["checkpoint_name"]
if (checkpoint_datetype == "DATETIME"):
sql = sql + " AND " + checkpoint_name + " >= " + "TO_DATE('%s','YYYY-MM-DD HH24:MI:SS') ORDER BY %s" % (cur_checkpoint,checkpoint_name)
if (checkpoint_datetype == "NUMBER"):
sql = sql + " AND " + checkpoint_name + " > " + "%s ORDER BY %s" % (cur_checkpoint,checkpoint_name)
qr.close()
return str(sql)
@Timing
def ReadCheckpointName():
qr = open(query, 'r')
line = qr.readline()
line = line.rstrip('\n')
line = line.rstrip('\r')
Query["sql"], Query["checkpoint_datetype"],Query["checkpoint_name"] = line.split(";")
checkpoint_name = Query["checkpoint_name"]
return str(checkpoint_name)
@Timing
def LocateCheckPoint(description):
description
checkpoint_name = ReadCheckpointName()
#print checkpoint_name
#print description
startcounter = 0
finalcounter = 0
flag = 0
for d in description:
prog = re.compile(checkpoint_name)
result = prog.match(d[0])
startcounter = startcounter + 1
if result:
finalcounter = startcounter - 1
counterstr = str(finalcounter)
print "Checkpoint found in the array position number: " + counterstr
flag = 1
if (flag == 0):
print "Checkpoint did not found"
return finalcounter
@Timing
def DictFactory(description,data):
column_names = [col[0] for col in description]
results = []
for row in data:
results.append(dict(zip(column_names,row)))
return results
@Timing
def WriteCsvLog(log_file,header,data):
checkpoint_index = LocateCheckPoint(header)
file_exists = os.path.isfile(log_file)
with open(log_file,'ab') as csv_file:
headers = [i[0] for i in header]
csv_writer = csv.writer(csv_file,delimiter='|')
if not file_exists:
print "File does not exist, writing new CSV file"
csv_writer.writerow(headers) # Writing headers once
for d in data:
csv_writer.writerow(d)
cur_checkpoint = d[checkpoint_index]
cur_checkpoint = str(cur_checkpoint)
WriteCheckpoint(cur_checkpoint,checkpoint_file)
csv_file.close()
@Timing
def WriteLog(log_file,header,data):
file_exist = os.path.isfile(log_file)
log = open(log_file,'a')
if not file_exist:
print "File does not exist, writing new log file"
open(log_file,'w').close()
mydata = DictFactory(header,data)
checkpoint_name = ReadCheckpointName()
#prin #string = ''
for m in mydata:
for k,v in m.items():
string = string + k + ' = ' + str(v) + ','
if k == checkpoint_name:
#print "KEY FOUND"
cur_checkpoint = v
cur_checkpoint = str(cur_checkpoint)
#print string
string = string + '\n'
print cur_checkpoint
log.write(string + '\n')
WriteCheckpoint(cur_checkpoint,checkpoint_file)
log.close()
@Timing
def WriteCheckpoint(cur_checkpoint,conf_file):
conf = open(conf_file,'w')
conf.write(cur_checkpoint)
conf.close()
@Timing
def GetInfo():
mypool = PoolToDB()
con = mypool.acquire()
cursor = con.cursor()
GetLastCheckpoint()
sql = ReadQuery()
#print sql
cursor.execute(sql)
#data = cursor.fetchall()
#WriteLog(log_file,cursor.description,data)
#WriteCsvLog(log_file,cursor.description,data)
cursor.close()
def __main__():
parser = OptionParser()
parser.add_option("-c","--change- password",dest="pass_to_change",help="Change the password for database connection",metavar="1")
(options, args) = parser.parse_args()
if (options.pass_to_change):
UpdatePassword()
else:
GetInfo()
__main__()
这是一个查询示例:
PS:我真的一直在谷歌和这个论坛上搜索我的问题,但我还没有找到类似的东西。选择COD_EMPRESA,COD_TIPSALDO,COD_INVERSION,COD_CUENTA,COD_OBJETIVO,CAN_CUOTAS,SAL_TOTAL,INT_TOTAL,SAL_RESERVA,APORTE_PROM,SAL_PROM,COSTO_PROM,SAL_TOT_ANTERIOR,FEC_ULT_CALCULO,INCLUIDO_POR,FEC_INCLUSION,MODIFICADO_POR,TO_CHAR(FEC_MODIFICACION,' DD / MM / YYYY HH24:MI:SS')作为FEC_MODIFICACION,CUOTA_COMISION,MONTO_COMISION,SAL_INFORMATIVO,CREDITO_PENDIENTE,DEBITO_PENDIENTE,CAN_CUOTAS_ANTERIOR来自FO.FO_TIPSALDOS_X_CUENTA,其中ROWNUM< = 100000和FEC_INCLUSION> = TO_DATE(' 2005-08-31 11:43:49' YYYY-MM-DD HH24:MI:SS')按FEC_INTRUCTION排序