根据3列更改值

时间:2019-10-24 09:28:17

标签: r dataframe

我有以下数据框子集

Initial Date        Type    Sub_type
AML 2018-01-02  DV  MR  
AML 2018-01-02  DV  MR_abdo     
DJ  2018-01-02  DV  MR      
DJ  2018-01-02  DV  MR_abdo     
MS  2018-01-02  V2  V2      
MS  2018-01-02  DV  UL      
NK  2018-01-02  DV  Pet_ct      
NK  2018-01-02  DV  CT_dr   
NK  2018-01-03  DV  CT_dr       
NK  2018-01-03  DV  Pet_ct      
PV  2018-01-03  V2  V2      
PV  2018-01-03  DV  UL      
MD  2018-01-04  V2  V2      
MD  2018-01-04  DV  MR      
NQ  2018-01-04  AN_BV   V1      
NQ  2018-01-04  DV  CT_dr       
PS  2018-01-04  DV  Møder       
PS  2018-01-04  DV  Ferie

我要更改Type的值,如果它的Initial,Date相同,并且该人在同一日期的Sub_type是V2。

例如,以MS为例,在2018年1月2日,此人的Type为V2和DV,在Sub_type中,此人分别为V2和UL。但是,由于此人的Sub_type V2为同一日期,所以我希望将DV的Type值更改为V2

希望输出

Initial Date        Type    Sub_type
AML 2018-01-02  DV  MR  
AML 2018-01-02  DV  MR_abdo     
DJ  2018-01-02  DV  MR      
DJ  2018-01-02  DV  MR_abdo     
MS  2018-01-02  V2  V2      
MS  2018-01-02  V2  UL      
NK  2018-01-02  DV  Pet_ct      
NK  2018-01-02  DV  CT_dr   
NK  2018-01-03  DV  CT_dr       
NK  2018-01-03  DV  Pet_ct      
PV  2018-01-03  V2  V2      
PV  2018-01-03  V2  UL      
MD  2018-01-04  V2  V2      
MD  2018-01-04  V2  MR      
NQ  2018-01-04  AN_BV   V1      
NQ  2018-01-04  DV  CT_dr       
PS  2018-01-04  DV  Møder       
PS  2018-01-04  DV  Ferie

和赔率

structure(list(Initial= c("AML", "AML", "DJ", "DJ", "MS", 
"MS", "NK", "NK", "NK", "NK", "PV", "PV", "MD", "MD", "NQ", "NQ", 
"PS", "PS"), Date = c("2018-01-02", "2018-01-02", "2018-01-02", 
"2018-01-02", "2018-01-02", "2018-01-02", "2018-01-02", "2018-01-02", 
"2018-01-03", "2018-01-03", "2018-01-03", "2018-01-03", "2018-01-04", 
"2018-01-04", "2018-01-04", "2018-01-04", "2018-01-04", "2018-01-04"
), Type= c("DV", "DV", "DV", "DV", "V2", "DV", "DV", "DV", 
"DV", "DV", "V2", "DV", "V2", "DV", "AN_BV", "DV", "DV", "DV"
), Sub_type= c("MR", "MR_abdo", "MR", "MR_abdo", "V2", 
"UL", "Pet_ct", "CT_dr", "CT_dr", "Pet_ct", "V2", "UL", "V2", 
"MR", "V1", "CT_dr", "Møder", "Ferie")), row.names = c(470L, 
585L, 1605L, 1796L, 6081L, 6230L, 6673L, 6710L, 6514L, 6586L, 
7490L, 7658L, 5512L, 5657L, 6968L, 7142L, 7182L, 7296L), class = "data.frame")

2 个答案:

答案 0 :(得分:2)

对于InitialDate中的每个组,我们检查是否Type == Sub_type并返回它们相似的Type

library(dplyr)

df %>%
  group_by(Initial, Date) %>%
  mutate(Type = if(any(Type == Sub_type)) Type[which.max(Type == Sub_type)] 
                else Type)

#   Initial Date       Type  Sub_type
#   <chr>   <chr>      <chr> <chr>   
# 1 AML     2018-01-02 DV    MR      
# 2 AML     2018-01-02 DV    MR_abdo 
# 3 DJ      2018-01-02 DV    MR      
# 4 DJ      2018-01-02 DV    MR_abdo 
# 5 MS      2018-01-02 V2    V2      
# 6 MS      2018-01-02 V2    UL      
# 7 NK      2018-01-02 DV    Pet_ct  
# 8 NK      2018-01-02 DV    CT_dr   
# 9 NK      2018-01-03 DV    CT_dr   
#10 NK      2018-01-03 DV    Pet_ct  
#11 PV      2018-01-03 V2    V2      
#12 PV      2018-01-03 V2    UL      
#13 MD      2018-01-04 V2    V2      
#14 MD      2018-01-04 V2    MR      
#15 NQ      2018-01-04 AN_BV V1      
#16 NQ      2018-01-04 DV    CT_dr   
#17 PS      2018-01-04 DV    Møder   
#18 PS      2018-01-04 DV    Ferie   

数据

df <- structure(list(Initial = c("AML", "AML", "DJ", "DJ", "MS", "MS", 
"NK", "NK", "NK", "NK", "PV", "PV", "MD", "MD", "NQ", "NQ", "PS", 
"PS"), Date = c("2018-01-02", "2018-01-02", "2018-01-02", "2018-01-02", 
"2018-01-02", "2018-01-02", "2018-01-02", "2018-01-02", "2018-01-03", 
"2018-01-03", "2018-01-03", "2018-01-03", "2018-01-04", "2018-01-04", 
"2018-01-04", "2018-01-04", "2018-01-04", "2018-01-04"), Type = c("DV", 
"DV", "DV", "DV", "V2", "DV", "DV", "DV", "DV", "DV", "V2", "DV", 
"V2", "DV", "AN_BV", "DV", "DV", "DV"), Sub_type = c("MR", "MR_abdo", 
"MR", "MR_abdo", "V2", "UL", "Pet_ct", "CT_dr", "CT_dr", "Pet_ct", 
"V2", "UL", "V2", "MR", "V1", "CT_dr", "Møder", "Ferie")), class = 
"data.frame", row.names = c(NA, -18L))

答案 1 :(得分:1)

一个 base 解决方案。只需用Type覆盖V2,其中Sub_type=="V2",您就可以在InitialDate中找到一个匹配项。

x$Type[interaction(x[, c("Initial", "Date")]) %in%
   interaction(x[x$Sub_type=="V2", c("Initial", "Date")])]  <- "V2"
x
#   Initial       Date  Type Sub_type
#1      AML 2018-01-02    DV       MR
#2      AML 2018-01-02    DV  MR_abdo
#3       DJ 2018-01-02    DV       MR
#4       DJ 2018-01-02    DV  MR_abdo
#5       MS 2018-01-02    V2       V2
#6       MS 2018-01-02    V2       UL
#7       NK 2018-01-02    DV   Pet_ct
#8       NK 2018-01-02    DV    CT_dr
#9       NK 2018-01-03    DV    CT_dr
#10      NK 2018-01-03    DV   Pet_ct
#11      PV 2018-01-03    V2       V2
#12      PV 2018-01-03    V2       UL
#13      MD 2018-01-04    V2       V2
#14      MD 2018-01-04    V2       MR
#15      NQ 2018-01-04 AN_BV       V1
#16      NQ 2018-01-04    DV    CT_dr
#17      PS 2018-01-04    DV    Møder
#18      PS 2018-01-04    DV    Ferie

数据:

x <- structure(list(Initial = structure(c(1L, 1L, 2L, 2L, 4L, 4L, 
5L, 5L, 5L, 5L, 8L, 8L, 3L, 3L, 6L, 6L, 7L, 7L), .Label = c("AML", 
"DJ", "MD", "MS", "NK", "NQ", "PS", "PV"), class = "factor"), 
    Date = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("2018-01-02", 
    "2018-01-03", "2018-01-04"), class = "factor"), Type = structure(c(2L, 
    2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 1L, 2L, 
    2L, 2L), .Label = c("AN_BV", "DV", "V2"), class = "factor"), 
    Sub_type = structure(c(4L, 5L, 4L, 5L, 9L, 7L, 6L, 1L, 1L, 
    6L, 9L, 7L, 9L, 4L, 8L, 1L, 3L, 2L), .Label = c("CT_dr", 
    "Ferie", "Møder", "MR", "MR_abdo", "Pet_ct", "UL", "V1", 
    "V2"), class = "factor")), class = "data.frame", row.names = c(NA, 
-18L))