我确信我只是犯了一个简单的错误。
我有一个需要迭代的大矩阵3307592x9,如果第8列(char / string)== 9(char / string)(不区分大小写),那么第3-7列(数字0-1)需要1-自己。我写的代码是:
for (i in 1:3307592){
if(grepl(chr2SnpFreqNorm[i,8], chr2SnpFreqNorm[i,9], ignore.case=TRUE)){
chr2SnpFreqNorm[i,3] <- 1 - chr2SnpFreqNorm[i,3]
chr2SnpFreqNorm[i,4] <- 1 - chr2SnpFreqNorm[i,4]
chr2SnpFreqNorm[i,5] <- 1 - chr2SnpFreqNorm[i,5]
chr2SnpFreqNorm[i,6] <- 1 - chr2SnpFreqNorm[i,6]
chr2SnpFreqNorm[i,7] <- 1 - chr2SnpFreqNorm[i,7]
}
}
当我尝试执行我的R客户端时,只需挂起半小时就可以取消命令。我不确定我做错了什么,因为代码对我来说是正确的。
/编辑 示例数据
> chr2SnpFreqNorm[1:10,]
ID pos ceuChr2SnpFreq chsChr2SnpFreq lwkChr2SnpFreq
1 rs187078949 10133 0.070588235 0.000 0.030927835
2 rs191522553 10140 0.005882353 0.000 0.005154639
3 rs149483862 10286 0.100000000 0.135 0.226804124
4 rs150919307 10297 0.147058824 0.070 0.113402062
5 rs186644623 10315 0.000000000 0.000 0.000000000
6 rs193294418 10345 0.017647059 0.000 0.036082474
7 rs185496709 10386 0.082352941 0.020 0.087628866
8 rs188771313 10419 0.229411765 0.085 0.056701031
9 rs192945962 10425 0.100000000 0.020 0.015463918
10 rs184397180 10431 0.064705882 0.005 0.036082474
tsiChr2SnpFreq yriChr2SnpFreq ALT AA
1 0.035714286 0.045454545 A a
2 0.005102041 0.005681818 A C
3 0.239795918 0.170454545 A t
4 0.168367347 0.130681818 T t
5 0.000000000 0.005681818 G C
6 0.030612245 0.028409091 A G
7 0.035714286 0.113636364 T t
8 0.147959184 0.090909091 G G
9 0.091836735 0.034090909 G c
10 0.015306122 0.045454545 T a
>
答案 0 :(得分:2)
首先是您的数据:
DF <- structure(list(ID = c("rs187078949", "rs191522553", "rs149483862",
"rs150919307", "rs186644623", "rs193294418", "rs185496709", "rs188771313",
"rs192945962", "rs184397180"), pos = c(10133L, 10140L, 10286L,
10297L, 10315L, 10345L, 10386L, 10419L, 10425L, 10431L), ceuChr2SnpFreq = c(0.070588235,
0.005882353, 0.1, 0.147058824, 0, 0.017647059, 0.082352941, 0.229411765,
0.1, 0.064705882), chsChr2SnpFreq = c(0, 0, 0.135, 0.07, 0, 0,
0.02, 0.085, 0.02, 0.005), lwkChr2SnpFreq = c(0.030927835, 0.005154639,
0.226804124, 0.113402062, 0, 0.036082474, 0.087628866, 0.056701031,
0.015463918, 0.036082474), tsiChr2SnpFreq = c(0.035714286, 0.005102041,
0.239795918, 0.168367347, 0, 0.030612245, 0.035714286, 0.147959184,
0.091836735, 0.015306122), yriChr2SnpFreq = c(0.045454545, 0.005681818,
0.170454545, 0.130681818, 0.005681818, 0.028409091, 0.113636364,
0.090909091, 0.034090909, 0.045454545), ALT = c("A", "A", "A",
"T", "G", "A", "T", "G", "G", "T"), AA = c("a", "C", "t", "t",
"C", "G", "t", "G", "c", "a")), .Names = c("ID", "pos", "ceuChr2SnpFreq",
"chsChr2SnpFreq", "lwkChr2SnpFreq", "tsiChr2SnpFreq", "yriChr2SnpFreq",
"ALT", "AA"), row.names = c("1", "2", "3", "4", "5", "6", "7",
"8", "9", "10"), class = "data.frame")
现在是一个data.table解决方案:
#use data.table for excellent efficiency
library(data.table)
DT <- data.table(DF)
#subtract 1 from columns 3 to 7 if columns ALT and AA are equal (case insensitive)
DT[tolower(ALT)==tolower(AA), 3:7 := lapply(.SD, `-`, e2 = 1), .SDcols=3:7]
# ID pos ceuChr2SnpFreq chsChr2SnpFreq lwkChr2SnpFreq tsiChr2SnpFreq yriChr2SnpFreq ALT AA
# 1: rs187078949 10133 -0.929411765 -1.000 -0.969072165 -0.964285714 -0.954545455 A a
# 2: rs191522553 10140 0.005882353 0.000 0.005154639 0.005102041 0.005681818 A C
# 3: rs149483862 10286 0.100000000 0.135 0.226804124 0.239795918 0.170454545 A t
# 4: rs150919307 10297 -0.852941176 -0.930 -0.886597938 -0.831632653 -0.869318182 T t
# 5: rs186644623 10315 0.000000000 0.000 0.000000000 0.000000000 0.005681818 G C
# 6: rs193294418 10345 0.017647059 0.000 0.036082474 0.030612245 0.028409091 A G
# 7: rs185496709 10386 -0.917647059 -0.980 -0.912371134 -0.964285714 -0.886363636 T t
# 8: rs188771313 10419 -0.770588235 -0.915 -0.943298969 -0.852040816 -0.909090909 G G
# 9: rs192945962 10425 0.100000000 0.020 0.015463918 0.091836735 0.034090909 G c
# 10: rs184397180 10431 0.064705882 0.005 0.036082474 0.015306122 0.045454545 T a
答案 1 :(得分:1)
for
不是R
中的朋友,这是使用apply
和条件索引的解决方案
## create some toy data
matrix( ncol=5, nrow = 100, c( runif(300), sample(c('A','G','C','T','a','c','g','t'), replace=T, 200))) -> data
flip_allele_freqs <- function(x) {
## function will return 1-x on any x that looks like a number less than 1
n = as.numeric(x)
if ( is.na(n) ) { ## cant convert to numeric, must be str
return(x)
}
if (n < 1) {
return( 1 - n )
} else {
return x
}
}
## apply the flip alleles function to the rows where the two last columns are equal
##fold the new data back into the old matrix
data[toupper(data[,5]) == toupper(data[,4]),] <-
apply(data[toupper(data[,5]) == toupper(data[,4]),], c(1,2), flip_allele_freqs)
祝GWAS好运!
答案 2 :(得分:1)
在基地R你可以做到
flip <- Vectorize(grepl)(chr2SnpFreqNorm[,8], chr2SnpFreqNorm[,9], ignore.case=TRUE)
chr2SnpFreqNorm[flip,3:7] <- 1 - chr2SnpFreqNorm[filp,3:7]
这可能有点慢,因为Vectorize
隐藏了一个循环。但是,如果您只需要翻转第8列和第9列完全匹配的行(除了大小写),那么请使用此过滤器:
flip <- tolower(chr2SnpFreqNorm[,8])==tolower(chr2SnpFreqNorm[,9])