继续我的上一个问题(Matching two string lists that partially match into another list)我偶然发现了另一个问题。也许我应该先发布到目前为止的代码:
#!/usr/bin/env python3.4
# -*- coding: utf-8 -*-
import random
import timeit
def generateSequences(n):
RandomSequences = []
dna = ["A","G","C","T"]
for i in range(int(n)):
randseq=''
for i in range(50):
randseq+=random.choice(dna)
RandomSequences.append(randseq)
return RandomSequences
def generatePrefixes(p, RandomSequences):
First20Chars = [x[:20] for x in RandomSequences]
RandomChoices = []
for i in range(p):
randomPrefix = random.choice(First20Chars)
RandomChoices.append(randomPrefix)
return First20Chars, RandomChoices
def searchReadsInList(RandomSequences, RandomChoices):
start_time = timeit.default_timer()
Matches_RS_RC = []
for i in RandomChoices:
for j in RandomSequences:
if i in j:
Matches_RS_RC.append(j)
elapsed = timeit.default_timer() - start_time
return Matches_RS_RC, elapsed
def makeSuffixDict(reads, extSize = 30, verbose = True):
"""
Generates a dictionary with read-suffixes as values from a list of reads.
Arguments:
reads: list of str, reads for generating the dictionary
extSize: int, length of suffixes stored as values in dict
verbose: bool, whether to print feedback about results
Returns:
suffixDict: dict, dictionary with read-suffixes as values
"""
dict = {}
if not verbose:
for i, read in enumerate(reads):
prefix = read[0:-extSize]
suffix = read[-extSize:]
if prefix not in dict:
dict[prefix] = [suffix]
else:
dict[prefix].append(suffix)
else:
unambiguous = set()
ambiguous = set()
for i, read in enumerate(reads):
prefix = read[0:-extSize]
suffix = read[-extSize:]
if prefix not in dict:
dict[prefix] = [suffix]
unambiguous.add(prefix)
else:
dict[prefix].append(suffix)
if suffix in unambiguous:
unambiguous.remove(suffix)
ambiguous.add(prefix)
print("Reads: ", len(reads), "\n",
"Keys: ", len(dict), "\n",
"Unambiguous: ", len(unambiguous), "\n",
"Ambiguous: ", len(ambiguous), sep = "")
return(dict)
def searchReadsInDict(RandomSequences, RandomChoices):
makeSuffixDict(RandomSequences)
Matches_RC_Dict = []
for i in RandomChoices:
for j in dict:
if i in j:
Matches_RC_Dict.append(j)
return Matches_RC_Dict
if __name__ == "__main__":
RandomSequences = generateSequences(15)
print ("genseq", RandomSequences)
First20Chars, RandomChoices = generatePrefixes(5, RandomSequences)
print ("genpre1", First20Chars)
print ("genpre2", RandomChoices)
Matches_RS_RC, elapsed = searchReadsInList(RandomSequences, RandomChoices)
print ("searchList", Matches_RS_RC)
print ("Time elapsed", elapsed)
Matches_RC_Dict = searchReadsInDict (RandomSequences, RandomChoices)
print ("SearchDict", Matches_RC_Dict)
让我感到困扰的是searchReadsInDict
。在searchReadsInDict
我需要使用提供的makeSuffixDict
来创建字典。然后我需要将generatePrefixes
随机选择的字符串与字典中的键进行匹配,并将键和值组合成一个字符串并将它们放入一个列表中。为了将一个键和值放在一个字符串中,我找到了几个方法,它们自己工作但是将它们集成到函数中总是会导致错误,我不确定将它们放在函数中的哪个位置。
另一个问题是即使运行它,因为它是Matches_RC_Dict
的输出是[]
。
我希望任何人都可以提供帮助。
答案 0 :(得分:0)
经过cyphase'的许可。来自freenode上的#python频道我想发布他的好答案,以防止对他人不必要的工作:
def searchReadsInDict(RandomSequences, RandomChoices):
start_time = timeit.default_timer()
mydict = makeSuffixDict(RandomSequences)
Matches_RC_Dict = [rc+end for rc in RandomChoices for end in mydict[rc]]
elapsed_sRD = timeit.default_timer() - start_time
return Matches_RC_Dict, elapsed_sRD