我的df看起来像:
SNP FRQ ACB ASW BEB CDX ACB_alt ASW_alt BEB_alt CDX_alt P EFF
rs10007883 0.3588 0.53645 0.54918 0.19186 0.10752 0.46354 0.45081 0.80813 0.89247 0.013510 -0.000152
rs10009522 0.8654 0.60416 0.47540 0.16279 0.14516 0.39583 0.52459 0.83720 0.85483 0.019823 0.009342
rs10010325 0.5277 0.45833 0.467213 0.45348 0.58602 0.54166 0.53278 0.54651 0.41397 0.9182352 -0.09135
rs10010809 0.3958 0.375 0.401639 0.29069 0.15591 0.62500 0.59836 0.70930 0.84408 0.0001923 -0.09402
rs10015151 0.6939 0.57291 0.44262 0.54651 0.36559 0.42708 0.55737 0.45348 0.63440 0.0192341 0.00012
rs10016978 0.5633 0.5625 0.56557 0.42441 0.16666 0.43750 0.43442 0.57558 0.83333 0.0153223 -0.00543
我想创建一个基于“更接近”测试有条件构建的新数据框。我想测试一下FRQ列是否更接近ACB列或ACB_alt列。如果FRQ更接近ACB列,我希望将列“SNP,FRQ,ACB,ASW,BEB和CDX”保存在新的data.frame中。如果FRQ列更接近ACB_alt,我希望将列“SNP,FRQ,ACB_alt,ASW_alt,BEB_alt和CDX_alt”保留在新数据框中。但是,我希望新数据框中新列的名称保持“ACB,ASW,BEB和CDX”,即使我使用的是alt值。
让我们以前两行为例进行说明。第1行的FRQ为0.3588。由于0.3588比ACB(0.53645)更接近ACB_alt(0.46354),我希望新数据框的第一行来自替代值:
SNP FRQ ACB ASW BEB CDX
rs10007883 0.3588 0.46354 0.45081 0.80813 0.89247
第2行的FRQ为0.8654。由于0.8654比ACB_alt(0.39583)更接近ACB(0.60416),我希望新数据帧的第二行来自常规值:
SNP FRQ ACB ASW BEB CDX
rs10007883 0.3588 0.46354 0.45081 0.80813 0.89247
rs10009522 0.8654 0.60416 0.47540 0.16279 0.14516
任何人都可以帮我吗?
编辑::我添加了一个名为“EFF”的专栏。如果我使用正常值,我希望EFF保持不变。如果我使用替代值,我希望EFF让它的标志翻转。
答案 0 :(得分:2)
你可以创建一个"普通"数据框和" alt"数据框通过过滤行并重命名列。然后只将row_bind
两个数据框放在一起。
我们在开头添加了rownames
列,因此我们可以在结尾处保持相同的行顺序。
library('tidyverse')
df <- read_table2("SNP FRQ ACB ASW BEB CDX ACB_alt ASW_alt BEB_alt CDX_alt
rs10007883 0.3588 0.53645 0.54918 0.19186 0.10752 0.46354 0.45081 0.80813 0.89247
rs10009522 0.8654 0.60416 0.47540 0.16279 0.14516 0.39583 0.52459 0.83720 0.85483
rs10010325 0.5277 0.45833 0.467213 0.45348 0.58602 0.54166 0.53278 0.54651 0.41397
rs10010809 0.3958 0.375 0.401639 0.29069 0.15591 0.62500 0.59836 0.70930 0.84408
rs10015151 0.6939 0.57291 0.44262 0.54651 0.36559 0.42708 0.55737 0.45348 0.63440
rs10016978 0.5633 0.5625 0.56557 0.42441 0.16666 0.43750 0.43442 0.57558 0.83333"
)
df <- add_rownames(df)
df_non_alt <- df %>%
filter(abs(FRQ - ACB) <= abs(FRQ - ACB_alt)) %>%
select(-ends_with('_alt'))
df_non_alt
#> # A tibble: 4 x 7
#> rowname SNP FRQ ACB ASW BEB CDX
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2 rs10009522 0.8654 0.60416 0.475400 0.16279 0.14516
#> 2 4 rs10010809 0.3958 0.37500 0.401639 0.29069 0.15591
#> 3 5 rs10015151 0.6939 0.57291 0.442620 0.54651 0.36559
#> 4 6 rs10016978 0.5633 0.56250 0.565570 0.42441 0.16666
df_alt <- df %>%
filter(abs(FRQ - ACB) > abs(FRQ - ACB_alt)) %>%
select(rowname, SNP, FRQ, ends_with('_alt')) %>%
rename_all(~gsub('_alt', '', .))
df_alt
#> # A tibble: 2 x 7
#> rowname SNP FRQ ACB ASW BEB CDX
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 rs10007883 0.3588 0.46354 0.45081 0.80813 0.89247
#> 2 3 rs10010325 0.5277 0.54166 0.53278 0.54651 0.41397
bind_rows(df_non_alt, df_alt) %>%
arrange(rowname) %>%
select(-rowname)
#> # A tibble: 6 x 6
#> SNP FRQ ACB ASW BEB CDX
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 rs10007883 0.3588 0.46354 0.450810 0.80813 0.89247
#> 2 rs10009522 0.8654 0.60416 0.475400 0.16279 0.14516
#> 3 rs10010325 0.5277 0.54166 0.532780 0.54651 0.41397
#> 4 rs10010809 0.3958 0.37500 0.401639 0.29069 0.15591
#> 5 rs10015151 0.6939 0.57291 0.442620 0.54651 0.36559
#> 6 rs10016978 0.5633 0.56250 0.565570 0.42441 0.16666
答案 1 :(得分:1)
保罗的代码使用了比我报废的更好的方法(我从未听说过过滤器直到现在:/),但我认为值得发帖。我天真地做了,基本上循环遍历每一行并将结果附加到空数据框。
编辑:我已根据要求添加了“EFF”列,此功能现在翻转替代案例的标志。
library('tidyverse')
df <- read_table2("SNP FRQ ACB ASW BEB CDX ACB_alt ASW_alt BEB_alt CDX_alt EFF
rs10007883 0.3588 0.53645 0.54918 0.19186 0.10752 0.46354 0.45081 0.80813 0.89247 1
rs10009522 0.8654 0.60416 0.47540 0.16279 0.14516 0.39583 0.52459 0.83720 0.85483 1
rs10010325 0.5277 0.45833 0.467213 0.45348 0.58602 0.54166 0.53278 0.54651 0.41397 1
rs10010809 0.3958 0.375 0.401639 0.29069 0.15591 0.62500 0.59836 0.70930 0.84408 1
rs10015151 0.6939 0.57291 0.44262 0.54651 0.36559 0.42708 0.55737 0.45348 0.63440 1
rs10016978 0.5633 0.5625 0.56557 0.42441 0.16666 0.43750 0.43442 0.57558 0.83333 1")
new_df <- data.frame(SNP=as.character(), FRQ=as.numeric(), ACB=as.numeric(),
ASW=as.numeric(), BEB=as.numeric(), CDX=as.numeric(),
EFF=as.numeric())
compareValues <- function(row){
if(abs(row$FRQ[1]-row$ACB[1]) <= abs(row$FRQ[1]-row$ACB_alt[1])) {
row <- row[,colnames(new_df)]
new_df <<- rbind(new_df, row)
} else {
row <- row[,c("SNP", "FRQ", "ACB_alt", "ASW_alt", "BEB_alt", "CDX_alt", "EFF")]
row$EFF <- -1 * row$EFF # Negate EFF
colnames(row) <- colnames(new_df)
new_df <<- rbind(new_df, row)
}
}
# Iterate thru rows of df
by(df, 1:nrow(df), compareValues)
print(new_df)
# A tibble: 6 x 7
# SNP FRQ ACB ASW BEB CDX EFF
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 rs10007883 0.359 0.464 0.451 0.808 0.892 -1.00
# 2 rs10009522 0.865 0.604 0.475 0.163 0.145 1.00
# 3 rs10010325 0.528 0.542 0.533 0.547 0.414 -1.00
# 4 rs10010809 0.396 0.375 0.402 0.291 0.156 1.00
# 5 rs10015151 0.694 0.573 0.443 0.547 0.366 1.00
# 6 rs10016978 0.563 0.562 0.566 0.424 0.167 1.00
答案 2 :(得分:1)
使用'data.table package. The use of
data.table`以最简单的形式解决此问题的另一种解决方法是:
library(data.table)
df <- read.table(text = "SNP FRQ ACB ASW BEB CDX ACB_alt ASW_alt BEB_alt CDX_alt
rs10007883 0.3588 0.53645 0.54918 0.19186 0.10752 0.46354 0.45081 0.80813 0.89247
rs10009522 0.8654 0.60416 0.47540 0.16279 0.14516 0.39583 0.52459 0.83720 0.85483
rs10010325 0.5277 0.45833 0.467213 0.45348 0.58602 0.54166 0.53278 0.54651 0.41397
rs10010809 0.3958 0.375 0.401639 0.29069 0.15591 0.62500 0.59836 0.70930 0.84408
rs10016978 0.5633 0.5625 0.56557 0.42441 0.16666 0.43750 0.43442 0.57558 0.83333", header = TRUE, stringsAsFactors = FALSE)
dt <- data.table(df)
dt[, .(SNP = SNP,
FRQ = FRQ,
ACB = ifelse(abs(FRQ - ACB) <= abs(FRQ - ACB_alt), ACB, ACB_alt ),
ASW = ifelse(abs(FRQ - ACB) <= abs(FRQ - ACB_alt), ASW, ASW_alt ),
BEB = ifelse(abs(FRQ - ACB) <= abs(FRQ - ACB_alt), BEB, BEB_alt ),
CDX = ifelse(abs(FRQ - ACB) <= abs(FRQ - ACB_alt), CDX, CDX_alt ))]
#Result:
# SNP FRQ ACB ASW BEB CDX
#1: rs10007883 0.3588 0.46354 0.450810 0.80813 0.89247
#2: rs10009522 0.8654 0.60416 0.475400 0.16279 0.14516
#3: rs10010325 0.5277 0.54166 0.532780 0.54651 0.41397
#4: rs10010809 0.3958 0.37500 0.401639 0.29069 0.15591
#5: rs10016978 0.5633 0.56250 0.565570 0.42441 0.16666