我有一个R脚本,用于使用Hudson估算器从次要等位基因频率计算Fst。当我运行脚本时,我在第74行收到以下错误消息(参见下面的代码):
Error in if (p < 2e-16) output <- list() :
missing value where TRUE/FALSE needed
由于我是R和编程的新手,我不确切地知道问题出在哪里或者应该放置TRUE / FALSE。欢迎任何帮助或建议!
# Code for calculating Hudson's Fst from minor allele frequencies:
# input data frame pop1 is a N x 4 matrix
# where N is the number of SNPs
# row names correspond to the SNP name
# MAF represent the minor allele frequency
# NCHROBS represent the number of chromosome observed (2 x sample size)
# A1 common allele
# A2 variant allele
# example
"""
> head(pop1a,5)
A1_maj A2_min MAF NCHROBS
rs3094315 G A 0.18590 156
rs3131972 A G 0.18350 158
rs3115860 C A 0.13160 152
rs12562034 A G 0.09615 156
rs12124819 G A 0.20950 148
rs2980300 A G 0.13290 158
"""
# similarly for pop2a:
Hudson.Fst <- function(pop1a,pop2a,call.rate = 0.95,top.number = 10){
# remove the SNPs that are not in common between the 2 populations
snp.to.keep <- intersect(row.names(pop1a),row.names(pop2a))
if (length(snp.to.keep) == 0){print("Error: no SNP in common");return(NULL)}
pop1a.k <- pop1a[snp.to.keep,]
pop2a.k <- pop2a[snp.to.keep,]
# change the reference allele if is not concordant between the 2 populations
if (sum(pop1a.k$A1_maj == pop2a.k$A1_maj) != length(snp.to.keep)){
idx <- which(pop1a.k$A1_maj != pop2a.k$A1_maj)
idx.rev <- which(pop1a.k$A1_maj != pop2a.k$A1_maj & pop1a.k$A1_maj == pop2a.k$A2_min)
idx.rm <- which(pop1a.k$A1_maj != pop2a.k$A1_maj & pop1a.k$A1_maj != pop2a.k$A2_min)
if(length(idx.rev) > 0){
provv <- pop1a.k$A1_maj[idx.rev]
pop1a.k$A1_maj[idx.rev] <- pop1a.k$A2_min[idx.rev]
pop1a.k$A2_min[idx.rev] <- provv
pop1a.k$MAF[idx.rev] <- 1 - pop1a.k$MAF[idx.rev]
}
if(length(idx.rm) > 0){
pop1a.k <- pop1a.k[-idx.rm,]
pop2a.k <- pop2a.k[-idx.rm,]}}
# remove SNPs with low call rate in one or both populations
N1 <- pop1a.k$NCHROBS
N2 <- pop1a.k$NCHROBS
idx.rm.pop1a <- which(N1 < max(N1)*call.rate)
idx.rm.pop2a <- which(N2 < max(N2)*call.rate)
idx.rm.all <- union(idx.rm.pop1a,idx.rm.pop2a)
pop1a.k <- pop1a.k[-idx.rm.all,]
pop2a.k <- pop2a.k[-idx.rm.all,]
# compute Hudson SNP_Fst and global Fst estimators
p1 <- pop1a.k$MAF
p2 <- pop2a.k$MAF
n1 <- pop1a.k$NCHROBS
n2 <- pop2a.k$NCHROBS
fst.N <- (p1 - p2)^2 - p1*(1-p1)/(n1-1) - p2*(1-p2)/(n2-1)
fst.D <- p1*(1-p2) + p2*(1-p1)
Fst.v <- fst.N/fst.D
names(Fst.v) <- row.names(pop1a.k[-idx.rm.all,])
Fst.o <- Fst.v[order(Fst.v,decreasing=TRUE)]
mu1 <- mean(fst.N)
mu2 <- mean(fst.D)
se1 <- sd(fst.N)/sqrt(length(fst.N))
se2 <- sd(fst.D)/sqrt(length(fst.D))
F.global <- mu1/mu2
se.F <- sqrt(se1^2+se2^2)
F_L95 <- F.global - 1.96*se.F
F_U95 <- F.global + 1.96*se.F
Z <- F.global/se.F
p <- 2*(1 - pnorm(Z))
if(p < 2e-16) {p <- "less than 2e-16"}
# Error in if (p < 2e-16) { : missing value where TRUE/FALSE needed
output[[1]] <- c(F.global,F_L95,F_U95,p)
names(output[[1]]) <- c("Hudson.Fst","L.95%.CI","U.95%.CI","p.val")
output[[2]] <- data.frame(Fst.o[1:top.number])
names(output[[2]]) <- c("Hudson.Fst")
return(output)}
# Run Fst for all columns
data1 <- Hudson.Fst(pop1a,pop2a,call.rate = 0.95, top.number = 10)