R - 比较两个不同长度的数据帧以替换NA值

时间:2017-06-01 01:47:33

标签: r

我有两张桌子(df1和df2):

df1 <- data.frame(name = c("A","B","D","F","H"), value=c("17","NA","12","NA","NA"))
df2 <- data.frame(name = c("A","B","C","D","E","F","G","H","I"), value=c("17","100","11","12","8","55","109","29","848"))

#df1
head(dput(df1))
structure(list(name = structure(1:5, .Label = c("A", "B", "D", 
"F", "H"), class = "factor"), value = structure(c(2L, 3L, 1L, 
3L, 3L), .Label = c("12", "17", "NA"), class = "factor")), .Names = c("name", 
"value"), row.names = c(NA, -5L), class = "data.frame")
  name value
1    A    17
2    B    NA
3    D    12
4    F    NA
5    H    NA


#df2
head(dput(df2))
structure(list(name = structure(1:9, .Label = c("A", "B", "C", 
"D", "E", "F", "G", "H", "I"), class = "factor"), value = structure(c(5L, 
1L, 3L, 4L, 8L, 7L, 2L, 6L, 9L), .Label = c("100", "109", "11", 
"12", "17", "29", "55", "8", "848"), class = "factor")), .Names = c("name", 
"value"), row.names = c(NA, -9L), class = "data.frame")
  name value
1    A    17
2    B   100
3    C    11
4    D    12
5    E     8
6    F    55

基本上,我需要将df1的所有NA值与df2的实际值匹配,创建一个新的df1:

#newdf1
      name value
    1    A    17
    2    B    100
    3    D    12
    4    F    55
    5    H    29

非常感谢! :)

3 个答案:

答案 0 :(得分:1)

以下是使用包dplyr的示例。 df3是最终输出。

# Load packages
library(dplyr)

# Create example data frames
df1 <- data_frame(name = c("A","B","D","F","H"), 
                  value = c(17, NA, 12, NA, NA))
df2 <- data_frame(name = c("A","B","C","D","E","F","G","H","I"), 
                  value = c(17, 100, 11, 12, 8, 55, 109, 29, 848))

df3 <- df1 %>%
  # Merge data frame by name
  left_join(df2, by = "name") %>%
  # Impute missing value
  mutate(value = ifelse(!is.na(value.x), value.x, value.y)) %>%
  select(name, value)

答案 1 :(得分:1)

na.ind <- is.na(df1$value)
na.nam <- as.character(df1$name)[na.ind]
val2 <- df2$value
names(val2) <- df2$name
df1$value[na.ind] <- val2[na.nam]

答案 2 :(得分:1)

我们可以加入on'名称'

library(data.table)
newdf1 <- copy(df1)
setDT(newdf1)[setDT(df2), value := i.value, on = 'name']
newdf1
#   name value
#1:    A    17
#2:    B   100
#3:    D    12
#4:    F    55
#5:    H    29

数据

df1 <- data_frame(name = c("A","B","D","F","H"), 
              value = c(17, NA, 12, NA, NA))
df2 <- data_frame(name = c("A","B","C","D","E","F","G","H","I"), 
              value = c(17, 100, 11, 12, 8, 55, 109, 29, 848))