连接数据框和替换行

时间:2018-08-24 13:22:15

标签: r join

我有一个像这样的表:data1

  V1 V2 V3
1  a  d  t
2  b  d  w
3  c  e  t
4  a  d  t
5  b  f  w
6  c  g  t
7  a  h  w
8  b  k  w
9  c  e  w

和数据2:

  V1 V2 V3
1  a  d  w
2  b  f  t
3  c  e  t

现在,对于在V1和V2中相同的所有行,我想用data2中的V3替换data1中的V3。这样我就得到一个这样的表:

   V1 V2 V3
1  a  d  w
2  b  d  w
3  c  e  t
4  a  d  t
5  b  f  t
6  c  g  t
7  a  h  w
8  b  k  w
9  c  e  t

有什么想法吗?

感谢您的帮助!

3 个答案:

答案 0 :(得分:1)

使用data.table的选项为

library(data.table)
setDT(data1)[data2, V3 := i.V3, on = .(V1, V2)]
data1
#   V1 V2 V3
#1:  a  d  w
#2:  b  d  w
#3:  c  e  t
#4:  a  d  w
#5:  b  f  t
#6:  c  g  t
#7:  a  h  w
#8:  b  k  w
#9:  c  e  t

数据

data1 <- structure(list(V1 = c("a", "b", "c", "a", "b", "c", "a", "b", 
"c"), V2 = c("d", "d", "e", "d", "f", "g", "h", "k", "e"), V3 = c("t", 
"w", "t", "t", "w", "t", "w", "w", "w")), row.names = c(NA, -9L
), class = "data.frame")

data2 <- structure(list(V1 = c("a", "b", "c"), V2 = c("d", "f", "e"), 
V3 = c("w", "t", "t")), row.names = c(NA, -3L), class = "data.frame")

答案 1 :(得分:0)

尝试此解决方案

您的数据框

data1<-data.frame(V1=c("a","b","c","a","b","c","a","b","c"),
V2=c("d","d","e","d","f","g","h","k","e"),
V3=c("t","w","t","t","w","t","w","w","w"))

data2<-data.frame(V1=c("a","b","c"),
                  V2=c("d","f","e"),
                  V3=c("w","t","t"))

使用列V1V2

加入他们
library("dplyr")
data3<-left_join(data1,data2,by=c("V1","V2"))
data3
  V1 V2 V3.x V3.y
1  a  d    t    w
2  b  d    w <NA>
3  c  e    t    t
4  a  d    t    w
5  b  f    w    t
6  c  g    t <NA>
7  a  h    w <NA>
8  b  k    w <NA>
9  c  e    w    t

替换适当的值

data3[!is.na(data3[,4]),3]<-data3[!is.na(data3[,4]),4]

您的输出

data3[,-4]
  V1 V2 V3.x
1  a  d    w
2  b  d    w
3  c  e    t
4  a  d    w
5  b  f    t
6  c  g    t
7  a  h    w
8  b  k    w
9  c  e    t

答案 2 :(得分:0)

我不确定这是否是最有效的答案,如果扩展到更大的数据帧,则需要小心。由于V3同时存在于data1和data2中,并且我们不使用它进行联接,所以联接将自动附加后缀以区分它们。在此示例中,它非常简单明了,但请确保您了解在用于更复杂情况下的工作方式。

library(tidyverse)
data1 <- data_frame(V1=rep(c('a','b','c'),3),
                    V2=c('d','d','e','d','f','g','h','k','e'),
                    V3=c('t','w','t','t','w','t','w','w','w'))

data2 <- data_frame(V1=c('a','b','c'),
                    V2=c('d','f','e'),
                    V3=c('w','t','t'))

left_join(data1,data2,by=c('V1','V2')) %>%
  mutate(V3=coalesce(V3.y,V3.x)) %>%
  select(-V3.x,-V3.y)

# A tibble: 9 x 3
  V1    V2    V3   
  <chr> <chr> <chr>
1 a     d     w    
2 b     d     w    
3 c     e     t    
4 a     d     w    
5 b     f     t    
6 c     g     t    
7 a     h     w    
8 b     k     w    
9 c     e     t