我一直无法找到以前的问题,这个问题恰好回答了我正在尝试的问题。
DF1
chr position effect.exposure ...
1 12345 A ...
2 54321 G ...
2 6789 C ...
3 9876 D ...
DF2
chr position effect.outcome other ...
1 12345 A C ...
2 54321 T G ...
3 12314 C A ...
5 12321 C D ...
这是我的数据的一般格式,其中有多个其他列与合并无关,但需要保留。
我想要的是合并具有相同" chr"的行。和"位置"严格要求,但也要确保" effect.exposure"在df1匹配" effect.outcome"或"其他"在df2。重要的是,如果" effect.exposure"不匹配" effect.outcome"或"其他",我希望删除该行。
"字符"和"位置"可以组合成结果数据中每个只有一列,但我想要两个"效果"和#34;其他"列在最终数据表中保持独立。
更新:
找到解决问题的方法。 我这样做的方法是将两个数据帧合并为" chr"和"位置"。
new.df <- merge(df1, df2, by = c("chr", "position"))
从这里开始,我已经采用了这个数据帧的一个子集,其中&#34; effect.exposure&#34;等于&#34; effect.outcome&#34;或&#34;其他&#34;。
final.df <- new.df[new.df$effect.exposure == new.df$effect.outcome |
new.df$effect.exposure == new.df$other, ]
完全披露,这可能不是最有效的方法,但它完美地运作。
答案 0 :(得分:0)
这是其中一个旧答案的扩展,其中执行了2 merge
个,然后每个合并的结果为rbind
。您的数据的问题是如何将结果与不同数量的列合并。您可以使用tidyr::gather
和tidyr::spread
来处理此问题。
您的数据
df1 <- structure(list(chr = c(1L, 2L, 2L, 3L), position = c(12345L,
54321L, 6789L, 9876L), effect.exposure = c("A", "G", "C", "D"
), misc = c("a", "b", "c", "d")), .Names = c("chr", "position",
"effect.exposure", "misc"), class = "data.frame", row.names = c(NA,
-4L))
df2 <- structure(list(chr = c(1L, 2L, 3L, 5L), position = c(12345L,
54321L, 12314L, 12321L), effect.outcome = c("A", "T", "C", "C"
), other = c("C", "G", "A", "D")), .Names = c("chr", "position",
"effect.outcome", "other"), class = "data.frame", row.names = c(NA,
-4L))
旧答案的延伸
library(dplyr)
library(tidyr)
result1 <- inner_join(df1, df2, by=c("chr", "position", "effect.exposure" = "effect.outcome")) %>%
gather(key, value, -chr, -position, -effect.exposure)
# chr position effect.exposure key value
# 1 1 12345 A misc a
# 2 1 12345 A other C
result2 <- inner_join(df1, df2, by=c("chr", "position", "effect.exposure" = "other")) %>%
gather(key, value, -chr, -position, -effect.exposure)
# chr position effect.exposure key value
# 1 2 54321 G misc b
# 2 2 54321 G effect.outcome T
ans <- rbind(result1, result2) %>%
spread(key, value)
# chr position effect.exposure effect.outcome misc other
# 1 1 12345 A <NA> a C
# 2 2 54321 G T b <NA>
答案 1 :(得分:0)
希望这有帮助!
library(dplyr)
final_df <- df1 %>%
inner_join(df2, by=c("chr", "position")) %>%
mutate(Resp_final = if_else((as.character(effect_exposure)==as.character(effect_outcome)) |
(as.character(effect_exposure)==as.character(other)), 1, 0)) %>%
filter(Resp_final==1) %>%
select(-Resp_final)
final_df
输出是:
chr position effect_exposure col4 effect_outcome other col5
1 1 12345 A Asdf A C 1234
2 2 54321 G Abc T G 987
#Sample data
> dput(df1)
structure(list(chr = c(1L, 2L, 2L, 3L), position = c(12345L,
54321L, 6789L, 9876L), effect_exposure = structure(c(1L, 4L,
2L, 3L), .Label = c("A", "C", "D", "G"), class = "factor"), col4 = structure(c(2L,
1L, 4L, 3L), .Label = c("Abc", "Asdf", "qwerty", "xyz"), class = "factor")), .Names = c("chr",
"position", "effect_exposure", "col4"), class = "data.frame", row.names = c(NA,
-4L))
> dput(df2)
structure(list(chr = c(1L, 2L, 3L, 5L), position = c(12345L,
54321L, 12314L, 12321L), effect_outcome = structure(c(1L, 3L,
2L, 2L), .Label = c("A", "C", "T"), class = "factor"), other = structure(c(2L,
4L, 1L, 3L), .Label = c("A", "C", "D", "G"), class = "factor"),
col5 = c(1234L, 987L, 675L, 3456L)), .Names = c("chr", "position",
"effect_outcome", "other", "col5"), class = "data.frame", row.names = c(NA,
-4L))