我将与不同ID关联的序列保存为sqlite3 DB中的两列,其中sequence是一列,而ids_string是另一列。我在从数据库中检索时遇到问题
字典创建为uniqe_sequence = [id列表] 该序列是大约7000个字符或更少的字符串,并且ID列表最多可以包含100万个字符
import sys
from Bio import SeqIO
from Bio.Seq import Seq
import time
import sqlite3
conn = sqlite3.connect('Sausql_012419.db')
c = conn.cursor()
c.execute("create table Sau (sequence text, list_of_ids text)")
for record in sequences_dict_d:
c.execute("insert into Sau values (?,?)", (record,'._/'.join(sequences_dict_d[record])))
conn.commit()
c.execute("SELECT COUNT(*) FROM Saureus_panproteome")
sql_count = c.fetchall()
print("saved sql database of {} proteins in --- {:.3f} seconds ---".format(sql_count[0][0],time.time() - start_time))
c.close()
conn.close()
#retrieval exact sequence match
for record in SeqIO.parse(queryfile, _format):
conn = sqlite3.connect('Sausql_012419.db')
c = conn.cursor()
c.execute('select list_of_ids from Sau where sequence = str(record.seq)')
print(c.fetchall()) # or do something with the list_of_ids