将表格与R中的另一个表格结合起来

时间:2020-07-07 23:56:26

标签: r

我有一个如下所示的df:

> df
      US  Canada Japan
food1 "1" "5"    "7"  
food2 "1" "9"    "10" 
food3 "0" "6"    "5"  

以及另外3个用作翻译的dfs:

> US
   Name   Corr. Name
1 food1    Pink Lady
2 food2 Blood Orange
3 food3   Asian Pear

> Canada
   Name   Corr. Name
1 food1   Honeycrisp
2 food2 Navel Orange
3 food3 Callery Pear

> Japan
   Name   Corr. Name
1 food1         Gala
2 food2 Navel Orange
3 food3   Bosc Pears

我想根据其他3个df将第一个df转换为以下内容:

> df
              US  Canada Japan
Pink Lady      1    NA     NA  
Honeycrisp     NA   5      NA  
Gala           NA   NA     7  
Blood Orange   1    NA     NA
Navel Orange   NA   9      10 
Asian Pear     0    NA     NA
Callery Pear   NA   6      NA 
Bosc Pears     NA   NA     5  

下面是可以复制表格的代码:

df <- structure(c("1", "1", "0", "5", "9", "6", "7", "10", "5"), .Dim = c(3L, 
3L), .Dimnames = list(c("food1", "food2", "food3"), c("US", "Canada", 
"Japan")))

US <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(3:1, .Label = c("Asian Pear", 
"Blood Orange", "Pink Lady"), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))

Canada <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(4:6, .Label = c("Asian Pear", 
"Blood Orange", "Pink Lady", "Honeycrisp", "Navel Orange", "Callery Pear"
), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))

Japan <- structure(list(Name = structure(1:3, .Label = c("food1", "food2", 
"food3"), class = "factor"), Corr..Name = structure(c(4L, 7L, 
6L), .Label = c("Asian Pear", "Blood Orange", "Pink Lady", "Gala", 
"Mandarin Orange", "Bosc Pears", "Navel Orange"), class = "factor")), class = "data.frame", row.names = c(NA, 
3L))

编辑更多问题: 如果我有重复的国家怎么办? 示例:

> df
        col1   col2  col3 col4
country   US Canada Japan   US
food1      1      5     7    3
food2      1      9    10    2
food3      0      6     5    4

复制代码

> dput(df)
structure(list(col1 = structure(c(3L, 2L, 2L, 1L), .Label = c("0", 
"1", "US"), class = "factor"), col2 = structure(c(4L, 1L, 3L, 
2L), .Label = c("5", "6", "9", "Canada"), class = "factor"), 
    col3 = structure(c(4L, 3L, 1L, 2L), .Label = c("10", "5", 
    "7", "Japan"), class = "factor"), col4 = c("US", "3", "2", 
    "4")), row.names = c("country", "food1", "food2", "food3"
), class = "data.frame")

所需的输出:

> df
country        US  Canada Japan US 
Pink Lady      1    NA     NA   3
Honeycrisp     NA   5      NA   NA  
Gala           NA   NA     7    NA 
Blood Orange   1    NA     NA   2
Navel Orange   NA   9      10   NA
Asian Pear     0    NA     NA   4
Callery Pear   NA   6      NA   NA
Bosc Pears     NA   NA     5    NA

让我知道是否有任何不清楚的事情

4 个答案:

答案 0 :(得分:1)

使用data.table的选项:

library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
rowsbind <- rbindlist(ctries, idcol="ctry")[
    setDT(reshape2::melt(df)), on=.(Name=Var1, ctry=Var2), value := value]
ans <- dcast(rowsbind,  Name + Corr..Name ~ ctry, value.var="value")
setcolorder(ans, c("Name", "Corr..Name", names(ctries)))

输出:

    Name   Corr..Name   US Canada Japan
1: food1    Pink Lady    1   <NA>  <NA>
2: food1   Honeycrisp <NA>      5  <NA>
3: food1         Gala <NA>   <NA>     7
4: food2 Blood Orange    1   <NA>  <NA>
5: food2 Navel Orange <NA>      9    10
6: food3   Asian Pear    0   <NA>  <NA>
7: food3 Callery Pear <NA>      6  <NA>
8: food3   Bosc Pears <NA>   <NA>     5

编辑新的df

library(data.table)
ctries <- list(US=US, Canada=Canada, Japan=Japan)
mDT <- melt(setDT(df[-1L], keep.rownames=TRUE), id.vars="rn")[, 
    ctry := gsub("\\.(.*)", "", variable)]
mDT[rbindlist(ctries, idcol="ctry"), on=.(rn=Name, ctry), Corr..Name := Corr..Name]
ans <- dcast(mDT,  rn + Corr..Name ~ variable, value.var="value")

输出:

      rn   Corr..Name   US Canada Japan US.1
1: food1    Pink Lady    1   <NA>  <NA>    3
2: food1   Honeycrisp <NA>      5  <NA> <NA>
3: food1         Gala <NA>   <NA>     7 <NA>
4: food2 Blood Orange    1   <NA>  <NA>    2
5: food2 Navel Orange <NA>      9    10 <NA>
6: food3   Asian Pear    0   <NA>  <NA>    4
7: food3 Callery Pear <NA>      6  <NA> <NA>
8: food3   Bosc Pears <NA>   <NA>     5 <NA>

答案 1 :(得分:0)

这是一个整洁的解决方案:

library(tidyverse)

df_long <- df %>%
  as_tibble(rownames = "Name") %>%
  pivot_longer(-Name, names_to = "country", values_to = "score")

list(US = US, Canada = Canada, Japan = Japan) %>%
  bind_rows(.id = "country") %>%
  left_join(df_long, by = c("country", "Name")) %>%
  arrange(Name) %>%
  pivot_wider(names_from = country, values_from = score) %>%
  select(-Name)
#> # A tibble: 8 x 4
#>   Corr..Name   US    Canada Japan
#>   <fct>        <chr> <chr>  <chr>
#> 1 Pink Lady    1     <NA>   <NA> 
#> 2 Honeycrisp   <NA>  5      <NA> 
#> 3 Gala         <NA>  <NA>   7    
#> 4 Blood Orange 1     <NA>   <NA> 
#> 5 Navel Orange <NA>  9      10   
#> 6 Asian Pear   0     <NA>   <NA> 
#> 7 Callery Pear <NA>  6      <NA> 
#> 8 Bosc Pears   <NA>  <NA>   5

reprex package(v0.3.0)于2020-07-07创建

答案 2 :(得分:0)

这是仅使用基数R的另一种方法。

# Make a list of dataframes for each country and name the elements respectively
x <- list(US, Canada, Japan)
names(x) <- c("US", "Canada", "Japan")

# Merge "df" to each dataframe independently using lapply
x <- lapply(1:length(x), function(i){ # length of x is 3
  # which country?
  country <- names(x)[i] 
  # merge Corr.Name column to respective food counts - assumes row order is the same
  df2 <- cbind(x[[i]]$Corr..Name, data.frame(df[,country])) 
  # rename the column names
  colnames(df2) <- c("Corr..Name", country)
  df2 
}) 

# Convert list of dataframes to one dataframes, merging by "Corr.Name"
x <- Reduce(function(df1, df2) merge(df1, df2, by = "Corr..Name", all = TRUE), x)

x # Result
#  Corr..Name   US Canada Japan
# 1   Asian Pear    0   <NA>  <NA>
# 2 Blood Orange    1   <NA>  <NA>
# 3    Pink Lady    1   <NA>  <NA>
# 4   Honeycrisp <NA>      5  <NA>
# 5 Navel Orange <NA>      9    10
# 6 Callery Pear <NA>      6  <NA>
# 7         Gala <NA>   <NA>     7
# 8   Bosc Pears <NA>   <NA>     5

答案 3 :(得分:0)

简单而基本的R解决方案:

### combine the 3 country dataframes into one dataframe, to make lookup easier

# add column to code for country
US$country     <- "US"
Canada$country <- "Canada"
Japan$country  <- "Japan"

# combine usinmg rbind()
df2 <- rbind(US, Canada, Japan); str(df2)


### create desired dataframe

# unique apple types (If order is important, define how to handle duplicates) 
df3 <- data.frame(Corr..Name = unique(df2$Corr..Name))

# lookup:
#   a) match: matches the apple type to the line in the initial country df
#   b) lookup in "df" using "[" operator
df3$US     <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = US$Corr..Name), "US"]))
df3$Canada <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Canada$Corr..Name), "Canada"]))
df3$Japan  <- as.numeric(sapply(X = df3$Corr..Name, FUN = function(x) df[match(x = x, table = Japan$Corr..Name), "Japan"]))

# show result
df3

结果:

    Corr..Name US Canada Japan
1    Pink Lady  1     NA    NA
2 Blood Orange  1     NA    NA
3   Asian Pear  0     NA    NA
4   Honeycrisp NA      5    NA
5 Navel Orange NA      9    10
6 Callery Pear NA      6    NA
7         Gala NA     NA     7
8   Bosc Pears NA     NA     5