分配相反的标签

时间:2017-08-08 01:29:02

标签: dplyr

我的数据如下所示:

customerNames.1=c('A','B','A','C','E','F')
customerNames.2=c('B','A','C','A','F','E')
Relation=c('Father','NA','Spouse','NA','Mother','NA')
Datasample=data.table(customerNames.1,customerNames.2,Relation)

# data looks like 

   customerNames.1 customerNames.2 Relation
1:               A               B   Father
2:               B               A       NA
3:               A               C   Spouse
4:               C               A       NA
5:               E               F   Mother
6:               F               E       NA

每一行显示customer.1与customer.2的关系,这里虽然主要来源中存在A到B的关系,但我也想用它们的关系将B记录到A(可以从第一个关系中导出,例如父亲 - 孩子,配偶 - 配偶,母亲 - 孩子)

我如何获得如下内容:

   customerNames.1 customerNames.2 Relation
1:               A               B   Father
2:               B               A    Child
3:               A               C   Spouse
4:               C               A   Spouse
5:               E               F   Mother
6:               F               E    Child

1 个答案:

答案 0 :(得分:1)

使用tidyverse,您可以执行类似的操作来计算您的缺失关系。它可能不是最简单的,但它可以按照我的想法运作。

获取您的源表,根据此信息计算您的关系,并加入您缺少的关系以完成您的表。

customerNames.1=c('A','B','A','C','E','F')
customerNames.2=c('B','A','C','A','F','E')
Relation=c('Father',NA_character_,'Spouse',NA_character_,'Mother',NA_character_)
library(dplyr)
Datasample=data_frame(customerNames.1,customerNames.2,Relation)
Datasample
#> # A tibble: 6 x 3
#>   customerNames.1 customerNames.2 Relation
#>             <chr>           <chr>    <chr>
#> 1               A               B   Father
#> 2               B               A     <NA>
#> 3               A               C   Spouse
#> 4               C               A     <NA>
#> 5               E               F   Mother
#> 6               F               E     <NA>

sourceTab <- tidyr::drop_na(Datasample) 
sourceTab
#> # A tibble: 3 x 3
#>   customerNames.1 customerNames.2 Relation
#>             <chr>           <chr>    <chr>
#> 1               A               B   Father
#> 2               A               C   Spouse
#> 3               E               F   Mother
computeTab <- sourceTab %>%
  select(customerNames.1 = customerNames.2, customerNames.2 = customerNames.1, Relation) %>%
  mutate(Relation = 
           case_when(
             Relation %in% c("Father", "Mother") ~ "Child",
             Relation == "Spouse" ~ "Spouse",
             TRUE ~ NA_character_
           ))
computeTab  
#> # A tibble: 3 x 3
#>   customerNames.1 customerNames.2 Relation
#>             <chr>           <chr>    <chr>
#> 1               B               A    Child
#> 2               C               A   Spouse
#> 3               F               E    Child
completedTab <- Datasample %>% 
  left_join(computeTab, by = c("customerNames.1", "customerNames.2")) %>%
  mutate(Relation = case_when(
    !is.na(Relation.x) ~ Relation.x,
    is.na(Relation.x) & !is.na(Relation.y) ~ Relation.y,
    TRUE ~ NA_character_)) %>%
  select(starts_with("customer"), Relation)
completedTab  
#> # A tibble: 6 x 3
#>   customerNames.1 customerNames.2 Relation
#>             <chr>           <chr>    <chr>
#> 1               A               B   Father
#> 2               B               A    Child
#> 3               A               C   Spouse
#> 4               C               A   Spouse
#> 5               E               F   Mother
#> 6               F               E    Child
bind_rows(sourceTab, completedTab)
#> # A tibble: 9 x 3
#>   customerNames.1 customerNames.2 Relation
#>             <chr>           <chr>    <chr>
#> 1               A               B   Father
#> 2               A               C   Spouse
#> 3               E               F   Mother
#> 4               A               B   Father
#> 5               B               A    Child
#> 6               A               C   Spouse
#> 7               C               A   Spouse
#> 8               E               F   Mother
#> 9               F               E    Child