我需要在R中合并两个表。
表X看起来像这样:
company_name country_code country cost1 cost2
1 Test1 FR <NA> NA 9.945000e-02
2 Test1 BR Brazil NA NA
3 Test2 <NA> USA 1 1.053000e-01
表Y看起来像这样:
country country_code tier
France FR 1
Brazil BR 2
USA US 1
我需要合并X和Y来获得Z:
name country_code tier
Test1 FR 1
Test2 BR 2
....
我应该怎么做以OR条件或其他东西合并?
答案 0 :(得分:1)
以下将会这样做。请注意,我使用了包zoo
中的函数,因此您需要安装它。
m <- merge(df1, df2, all = TRUE)
m$country <- zoo::na.locf(m$country)
m <- lapply(split(m, m$country), function(.m) zoo::na.locf(.m, fromLast = TRUE))
m <- lapply(m, function(.m) zoo::na.locf(.m))
m <- do.call(rbind, m)
m <- m[!duplicated(m), c(3, 2, 4)]
row.names(m) <- NULL
m
# name country_code tier
#1 First FR 1
#2 Third US 1
#3 Second BR 2
数据。强>
df1 <-
structure(list(name = structure(1:3, .Label = c("First", "Second",
"Third"), class = "factor"), country = structure(c(1L, NA, 2L
), .Label = c("France", "USA"), class = "factor"), country_code = structure(c(NA,
1L, 2L), .Label = c("BR", "US"), class = "factor")), .Names = c("name",
"country", "country_code"), class = "data.frame", row.names = c(NA,
-3L))
df2 <-
structure(list(country = structure(c(2L, 1L, 3L), .Label = c("Brazil",
"France", "USA"), class = "factor"), country_code = structure(c(2L,
1L, 3L), .Label = c("BR", "FR", "US"), class = "factor"), tier = c(1L,
2L, 1L)), .Names = c("country", "country_code", "tier"), class = "data.frame", row.names = c(NA,
-3L))
修改强>
在OP的评论和问题编辑之后,输入数据已更改,以下代码和新df1
反映了此更改。
fun <- function(DF, col){
sp <- split(DF, DF[[col]])
m <- lapply(sp, function(.m) zoo::na.locf(.m, fromLast = TRUE))
m <- lapply(m, function(.m) zoo::na.locf(.m))
m <- do.call(rbind, m)
row.names(m) <- NULL
m
}
m <- merge(df1, df2, all = TRUE)
m$country <- zoo::na.locf(m$country)
m$country_code <- zoo::na.locf(m$country_code)
m <- fun(m, "country_code")
m <- m[!duplicated(m), ]
m
# country_code country company_name cost1 cost2 tier
#1 BR Brazil Test <NA> 0.0819 2
#2 FR France Test <NA> 0.09945 1
#4 US USA Test <NA> 0.1053 1
df1 <-
structure(list(company_name = structure(c(1L, 1L, 1L), .Label = "Test", class = "factor"),
country_code = structure(c(2L, 1L, NA), .Label = c("BR",
"FR"), class = "factor"), country = structure(c(NA, 1L, 2L
), .Label = c("Brazil", "USA"), class = "factor"), cost1 = c(NA,
NA, NA), cost2 = c(0.09945, 0.0819, 0.1053)), .Names = c("company_name",
"country_code", "country", "cost1", "cost2"), class = "data.frame", row.names = c("1",
"2", "3"))