我正在尝试编写一个程序来重新排列dna序列 可能会破坏序列中的顺序。我写过matlab 代码,但它太慢了。我也在考虑汉明的距离 测量或levenstein测量,我怎样才能将这些测量结合起来 确保适当的洗牌。我在洗牌中遵循的规则
- 规则1:第i个残差不应该接近i-1,i-2,i-3,i + 1,i + 2,i + 3
- 规则2:在下一个安排中,我的新位置和旧位置必须是20位差。即如果A在弦中的第一个位置 洗牌后的字符串必须大于21位。
function seq=shuffling(str)
len=length(str);
t1=0.4;
seqlen=1:len;
if(len>150)
t1=0.90;
elseif(len>=100)
t1=0.7;
end
while 1
shufseq=randperm(len);
temp1=diff([seqlen;shufseq]);%differences between order indices of original and shuffled arrangement
if(isempty(find(temp1==0)) && isempty(find(diff(shufseq)==1|diff(shufseq)==2 |diff(shufseq)==3 |diff(shufseq)==4 |diff(shufseq)==-1|diff(shufseq)==-2 |diff(shufseq)==-3 |diff(shufseq)==-4)))% rule 1
if((length(find(temp1>20|temp1<-20))/len)>t1)%rule 2 if ratio of (counts of arrangements/length of the string) should be more than one after certain length threshhold(=t1)
break
else
continue
end
else
continue
end
end
seq=str(shufseq);
答案 0 :(得分:0)
function seq=newshuffle(str)
%#codegen
len=length(str);
seq=[];
ndict= ['A';'C';'G';'T'];
ncomp=struct2array(count(str))';
for l=1:len
while 1
x=randi(4,1,1);
if ncomp(x)~=0
break;
end
end
seq=[seq,ndict(x)];
ncomp(x)=ncomp(x)-1;
end
end