我必须阅读一些文件,并将有关这些文件的一些信息放入MySQL
数据库。我有82个文件。我想同时阅读多个文件。为此,我有以下功能定义:
def sql_processes(db1, infile_name, cursor, z):
print infile_name
PrintLog("Adding " + infile_name + "to MySQL...")
vcf_reader = vcf.Reader(open(infile_name, 'r'))
for record in vcf_reader:
snp_position='_'.join([record.CHROM, str(record.POS)])
ref_F = float(record.INFO['DP4'][0])
ref_R = float(record.INFO['DP4'][1])
alt_F = float(record.INFO['DP4'][2])
alt_R = float(record.INFO['DP4'][3])
AF = (alt_F+alt_R)/(alt_F+alt_R+ref_F+ref_R)
sql_test_query = "SELECT * from snps where snp_pos='" + snp_position + "'"
try:
sql_insert_table = "INSERT INTO snps (snp_pos, " + str(z) + "g) VALUES ('" + snp_position + "', " + str(AF) + ")"
cursor.execute(sql_insert_table)
except db1.IntegrityError, e:
sql_insert_table = "UPDATE snps SET " + str(z) + "g=" + str(AF) + " WHERE snp_pos='" + snp_position + "'";
cursor.execute(sql_insert_table)
db1.commit()
PrintLog("Added " + infile_name + "to MySQL!")
def extractAF(files_vcf):
z=6
snp_dict=[]
#First connection
#db1 = MS.connect(host="localhost",user="root",passwd="sequentia2")
#cursor = db1.cursor()
#sql_create_db = "CREATE DATABASE SUPER_SNP_calling"
#cursor.execute(sql_create_db)
#db1.commit()
#db1.close()
#Second connection once we have created the db
db1 = MS.connect(host="localhost",user="root",passwd="sequentia2",db="SUPER_SNP_calling")
cursor = db1.cursor()
#sql_create_table = "CREATE TABLE snps (snp_pos VARCHAR(40) PRIMARY KEY"
#for num in range(0, len(files_vcf)):
# sql_create_table = sql_create_table + ", " + str(num) + "g FLOAT(4,3)"
#sql_create_table = sql_create_table + ")"
#cursor.execute(sql_create_table)
#db1.commit()
threads = []
for infile_name in sorted(files_vcf):
vcf_reader = vcf.Reader(open(infile_name, 'r'))
t = Thread(target = sql_processes, args = (db1, infile_name, cursor, z)).start()
threads.append(t)
z+=1
count_t = 1
my_threads = []
for t in threads:
t.start()
my_threads.append(t)
if count_t == 8:
for x in my_threads:
x.join()
my_threads = []
count_t = 0
count_t+=1
db1.close()
return snp_dict #this is empty, I should solve this.
但是,我认为这是有问题的部分:
count_t = 1
my_threads = []
for t in threads:
t.start()
my_threads.append(t)
if count_t == 8:
for x in my_threads:
x.join()
my_threads = []
count_t = 0
count_t+=1
db1.close()
我想同时阅读8个文件。然后,等到8个进程完成,以便开始接下来的8个进程。但是这会引发以下错误:
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "./SUPER_mysql4.py", line 460, in sql_processes
db1.commit()
InterfaceError: (0, '')
我该如何纠正?