我正在编写一个脚本来创建用于分析的csv文件。运行脚本时,它为OPA 5701提供1个csv文件,为6561提供1个。这是脚本的两个部分之间的唯一区别。
##Samplesheet for GS0005701
rows<-unique(samples$Sample_Name)
samplesheet<-rows
opa.panels<-sort(unique(samples$Pool_ID))
for ( i in 1:length(opa.panels)){
samps<-samples[samples$Pool_ID == opa.panels[i],]
idx<-match(samps$Sample_Name,rows)
samplesheet<-cbind(samplesheet,samps$Sentrix_ID[idx],samps$Sentrix_Position[idx])
}
colnames(samplesheet)[2:(length(opa.panels)*2+1)]<-c("SentrixBarcode_A","SentrixPosition_A","SentrixBarcode_B","SentrixPosition_B","SentrixBarcode_C","SentrixPosition_C","SentrixBarcode_D","SentrixPosition_D")[1:(length(opa.panels)*2)]
colnames(samplesheet)[1]<-"Sample_Name"
idx<-match(rows,samples$Sample_Name)
samplesheet<-cbind(samplesheet,samples[idx,c("Sample_Group","NorTum","Sample")])
ss_header<-c("[Header]","Investigator Name,Sander","Project Name,HNPCC_NA_MYH","Experiment Name,OPA1+2+3+4","Date,5062012","[Manifests]")
for (i in 1:length(opa.panels)) ss_header<-c(ss_header,paste(LETTERS[i],opa.panels[i],sep=","))
ss_header<-c(ss_header,"[Data]")
writeLines(ss_header,"Samplesheet5701.csv")
write.table(samplesheet,file="Samplesheet5701.csv",sep=",",row.names=FALSE,quote=FALSE,append=TRUE,na="")
##Samplesheet for GS0006561-OPA
rows2<-unique(samples2$Sample_Name)
samplesheet2<-rows2
opa.panels2<-sort(unique(samples2$Pool_ID))
for ( j in 1:length(opa.panels2)){
samps2<-samples2[samples2$Pool_ID == opa.panels2[j],]
idx2<-match(samps2$Sample_Name,rows2)
samplesheet2<-cbind(samplesheet2,samps2$Sentrix_ID[idx2],samps2$Sentrix_Position[idx2])
}
colnames(samplesheet2)[2:(length(opa.panels)*2+1)]<-c("SentrixBarcode_A","SentrixPosition_A","SentrixBarcode_B","SentrixPosition_B","SentrixBarcode_C","SentrixPosition_C","SentrixBarcode_D","SentrixPosition_D")[1:(length(opa.panels)*2)]
colnames(samplesheet2)[1]<-"Sample_Name"
idx2<-match(rows2,samples2$Sample_Name)
samplesheet2<-cbind(samplesheet2,samples2[idx2,c("Sample_Group","NorTum","Sample")])
ss_header<-c("[Header]","Investigator Name,Sander","Project Name,HNPCC_NA_MYH","Experiment Name,OPA1+2+3+4","Date,5062012","[Manifests]")
for (j in 1:length(opa.panels2)) ss_header<-c(ss_header,paste(LETTERS[j],opa.panels2[j],sep=","))
ss_header<-c(ss_header,"[Data]")
writeLines(ss_header,"samplesheet6561.csv")
write.table(samplesheet2,file="Samplesheet6561.csv",sep=",",row.names=FALSE,quote=FALSE,append=TRUE,na="")
## Samplesheet GS0005701部件创建data.frame
。而## Samplesheet GS0006561创建matrix
。使用相同的代码和相同的输入数据。
输入数据如下所示:
复制粘贴:
Sample Sample_Name Sample_Group NorTum Sentrix_ID Sentrix_Position Pool_ID Folderdate
1 00-04193 00-04193N HNPCC_UV N 1495421 R007_C012 GS0006564-OPA Exp060410
2 00-04193 00-04193N HNPCC_UV N 1495447 R007_C012 GS0006562-OPA Exp060410
3 00-04193 00-04193N HNPCC_UV N 1495447 R007_C006 GS0006561-OPA Exp060410
4 00-04193 00-04193N HNPCC_UV N 1495421 R007_C006 GS0006563-OPA Exp060410
5 00-04193 00-04193N HNPCC_UV N 1460498 R007_C005 GS0006561-OPA Exp060516
6 00-04193 00-04193N HNPCC_UV N 1460498 R007_C012 GS0006564-OPA Exp060516
我知道这是一个难以回答的问题,但我希望有人可以给我一个关于1代码如何创建data.frame而另一个是矩阵的提示。
非常感谢提前!
答案 0 :(得分:1)
这个问题的正确答案是索引部分的交换。
idx<-match(samps$Sample_Name,rows)
更改为:
idx<-match(rows,samps$Sample_Code)
因此行的长度与Sample_Code的长度相同。