我有一个如下所示的df:
> df
US Canada Japan
food1 "1" "5" "7"
food2 "1" "9" "10"
food3 "0" "6" "5"
以及另外3个用作翻译的dfs:
> US
Name Corr. Name
1 food1 Pink Lady
2 food2 Blood Orange
3 food3 Asian Pear
> Canada
Name Corr. Name
1 food1 Honeycrisp
2 food2 Navel Orange
3 food3 Callery Pear
> Japan
Name Corr. Name
1 food1 Gala
2 food2 Navel Orange
3 food3 Bosc Pears
我想根据其他3个df将第一个df转换为以下内容:
> df
US Canada Japan
Pink Lady 1 NA NA
Honeycrisp NA 5 NA
Gala NA NA 7
Blood Orange 1 NA NA
Navel Orange NA 9 10
Asian Pear 0 NA NA
Callery Pear NA 6 NA
Bosc Pears NA NA 5
df <- structure(c("1", "1", "0", "5", "9", "6", "7", "10", "5"), .Dim = c(3L,
3L), .Dimnames = list(c("food1", "food2", "food3"), c("US", "Canada",
"Japan")))
US <- structure(list(Name = structure(1:3, .Label = c("food1", "food2",
"food3"), class = "factor"), Corr..Name = structure(3:1, .Label = c("Asian Pear",
"Blood Orange", "Pink Lady"), class = "factor")), class = "data.frame", row.names = c(NA,
3L))
Canada <- structure(list(Name = structure(1:3, .Label = c("food1", "food2",
"food3"), class = "factor"), Corr..Name = structure(4:6, .Label = c("Asian Pear",
"Blood Orange", "Pink Lady", "Honeycrisp", "Navel Orange", "Callery Pear"
), class = "factor")), class = "data.frame", row.names = c(NA,
3L))
Japan <- structure(list(Name = structure(1:3, .Label = c("food1", "food2",
"food3"), class = "factor"), Corr..Name = structure(c(4L, 7L,
6L), .Label = c("Asian Pear", "Blood Orange", "Pink Lady", "Gala",
"Mandarin Orange", "Bosc Pears", "Navel Orange"), class = "factor")), class = "data.frame", row.names = c(NA,
3L))
编辑更多问题: 如果我有重复的国家怎么办? 示例:
> df
col1 col2 col3 col4
country US Canada Japan US
food1 1 5 7 3
food2 1 9 10 2
food3 0 6 5 4
复制代码
> dput(df)
structure(list(col1 = structure(c(3L, 2L, 2L, 1L), .Label = c("0",
"1", "US"), class = "factor"), col2 = structure(c(4L, 1L, 3L,
2L), .Label = c("5", "6", "9", "Canada"), class = "factor"),
col3 = structure(c(4L, 3L, 1L, 2L), .Label = c("10", "5",
"7", "Japan"), class = "factor"), col4 = c("US", "3", "2",
"4")), row.names = c("country", "food1", "food2", "food3"
), class = "data.frame")
所需的输出:
> df
country US Canada Japan US
Pink Lady 1 NA NA 3
Honeycrisp NA 5 NA NA
Gala NA NA 7 NA
Blood Orange 1 NA NA 2
Navel Orange NA 9 10 NA
Asian Pear 0 NA NA 4
Callery Pear NA 6 NA NA
Bosc Pears NA NA 5 NA
让我知道是否有任何不清楚的事情
答案 0 :(得分:1)
使用data.table
的选项:
library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
rowsbind <- rbindlist(ctries, idcol="ctry")[
setDT(reshape2::melt(df)), on=.(Name=Var1, ctry=Var2), value := value]
ans <- dcast(rowsbind, Name + Corr..Name ~ ctry, value.var="value")
setcolorder(ans, c("Name", "Corr..Name", names(ctries)))
输出:
Name Corr..Name US Canada Japan
1: food1 Pink Lady 1 <NA> <NA>
2: food1 Honeycrisp <NA> 5 <NA>
3: food1 Gala <NA> <NA> 7
4: food2 Blood Orange 1 <NA> <NA>
5: food2 Navel Orange <NA> 9 10
6: food3 Asian Pear 0 <NA> <NA>
7: food3 Callery Pear <NA> 6 <NA>
8: food3 Bosc Pears <NA> <NA> 5
编辑新的df
:
library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
mDT <- melt(setDT(df[-1L], keep.rownames=TRUE), id.vars="rn")[,
ctry := gsub("\\.(.*)", "", variable)]
mDT[rbindlist(ctries, idcol="ctry"), on=.(rn=Name, ctry), Corr..Name := Corr..Name]
ans <- dcast(mDT, rn + Corr..Name ~ variable, value.var="value")
输出:
rn Corr..Name US Canada Japan US.1
1: food1 Pink Lady 1 <NA> <NA> 3
2: food1 Honeycrisp <NA> 5 <NA> <NA>
3: food1 Gala <NA> <NA> 7 <NA>
4: food2 Blood Orange 1 <NA> <NA> 2
5: food2 Navel Orange <NA> 9 10 <NA>
6: food3 Asian Pear 0 <NA> <NA> 4
7: food3 Callery Pear <NA> 6 <NA> <NA>
8: food3 Bosc Pears <NA> <NA> 5 <NA>
答案 1 :(得分:0)
这是一个整洁的解决方案:
library(tidyverse)
df_long <- df %>%
as_tibble(rownames = "Name") %>%
pivot_longer(-Name, names_to = "country", values_to = "score")
list(US = US, Canada = Canada, Japan = Japan) %>%
bind_rows(.id = "country") %>%
left_join(df_long, by = c("country", "Name")) %>%
arrange(Name) %>%
pivot_wider(names_from = country, values_from = score) %>%
select(-Name)
#> # A tibble: 8 x 4
#> Corr..Name US Canada Japan
#> <fct> <chr> <chr> <chr>
#> 1 Pink Lady 1 <NA> <NA>
#> 2 Honeycrisp <NA> 5 <NA>
#> 3 Gala <NA> <NA> 7
#> 4 Blood Orange 1 <NA> <NA>
#> 5 Navel Orange <NA> 9 10
#> 6 Asian Pear 0 <NA> <NA>
#> 7 Callery Pear <NA> 6 <NA>
#> 8 Bosc Pears <NA> <NA> 5
由reprex package(v0.3.0)于2020-07-07创建
答案 2 :(得分:0)
这是仅使用基数R的另一种方法。
# Make a list of dataframes for each country and name the elements respectively
x <- list(US, Canada, Japan)
names(x) <- c("US", "Canada", "Japan")
# Merge "df" to each dataframe independently using lapply
x <- lapply(1:length(x), function(i){ # length of x is 3
# which country?
country <- names(x)[i]
# merge Corr.Name column to respective food counts - assumes row order is the same
df2 <- cbind(x[[i]]$Corr..Name, data.frame(df[,country]))
# rename the column names
colnames(df2) <- c("Corr..Name", country)
df2
})
# Convert list of dataframes to one dataframes, merging by "Corr.Name"
x <- Reduce(function(df1, df2) merge(df1, df2, by = "Corr..Name", all = TRUE), x)
x # Result
# Corr..Name US Canada Japan
# 1 Asian Pear 0 <NA> <NA>
# 2 Blood Orange 1 <NA> <NA>
# 3 Pink Lady 1 <NA> <NA>
# 4 Honeycrisp <NA> 5 <NA>
# 5 Navel Orange <NA> 9 10
# 6 Callery Pear <NA> 6 <NA>
# 7 Gala <NA> <NA> 7
# 8 Bosc Pears <NA> <NA> 5
答案 3 :(得分:0)
简单而基本的R解决方案:
### combine the 3 country dataframes into one dataframe, to make lookup easier
# add column to code for country
US$country <- "US"
Canada$country <- "Canada"
Japan$country <- "Japan"
# combine usinmg rbind()
df2 <- rbind(US, Canada, Japan); str(df2)
### create desired dataframe
# unique apple types (If order is important, define how to handle duplicates)
df3 <- data.frame(Corr..Name = unique(df2$Corr..Name))
# lookup:
# a) match: matches the apple type to the line in the initial country df
# b) lookup in "df" using "[" operator
df3$US <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = US$Corr..Name), "US"]))
df3$Canada <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Canada$Corr..Name), "Canada"]))
df3$Japan <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Japan$Corr..Name), "Japan"]))
# show result
df3
结果:
Corr..Name US Canada Japan
1 Pink Lady 1 NA NA
2 Blood Orange 1 NA NA
3 Asian Pear 0 NA NA
4 Honeycrisp NA 5 NA
5 Navel Orange NA 9 10
6 Callery Pear NA 6 NA
7 Gala NA NA 7
8 Bosc Pears NA NA 5