Question

actual_date=c('2018-01-03','2018-01-02','2018-01-25','2018-01-15','2018-01-06','2018-01-02','2018-01-16','2018-01-22','2018-01-03')
date_band=c('_201801','_201801','_201803','_201802','_201801', '_201801','_201803','_201804','_201801')


    action=c('text','letter','call','letter','visit','letter','text','text','call')
unique_ref=c(1,1,2,1,2,3,3,4,4)
df1=as.data.frame(cbind(unique_ref,actual_date,date_band,action))

unique_ref=c(1,2,3,4)
priority_201801=c('3','2','3','0')
balance_201801=c('30','-20','35','-100')
priority_201802=c('1','1','1','2')
balance_201802=c('60','-40','35','0')
priority_201803=c('2','2','3','2')
balance_201803=c('30','-40','-50','100')
priority_201804=c('99','0','0','0')
balance_201804=c('0','-20','-50','-100')


df2=as.data.frame(cbind(unique_ref,priority_201801,balance_201801,priority_201802,balance_201802,priority_201803,
                        balance_201803,priority_201804,balance_201804))

上面的代码产生了我正在使用的两个数据集示例。

df1看起来像这样：

  unique_ref actual_date date_band action
1          1  2018-01-03   _201801   text
2          1  2018-01-02   _201801 letter
3          2  2018-01-25   _201803   call
4          1  2018-01-15   _201802 letter
5          2  2018-01-06   _201801  visit
6          3  2018-01-02   _201801 letter
7          3  2018-01-16   _201803   text
8          4  2018-01-22   _201804   text
9          4  2018-01-03   _201801   call

df2如下：

  unique_ref priority_201801 balance_201801 priority_201802 balance_201802 priority_201803 balance_201803 priority_201804 balance_201804
1          1               3             30               1             60               2             30              99              0
2          2               2            -20               1            -40               2            -40               0            -20
3          3               3             35               1             35               3            -50               0            -50
4          4               0           -100               2              0               2            100               0           -100

我想做的是在df2中添加一列，指出action_dateband（即action_201801，action_201802等）。这将从df1中使用date_band的操作中获取，并在unqiue_ref上进行匹配。

所需的输出如下：如果一个星期有两个，则两个动作之间会出现逗号。

  unique_ref priority_201801 balance_201801 action_201801 priority_201802 balance_201802 action_201802 priority_201803 balance_201803 action_201803
1          1               3             30   text,letter               1             60        letter               2             30              
2          2               2            -20         visit               1            -40                             2            -40          call
3          3               3             35        letter               1             35                             3            -50          text
4          4               0           -100          call               2              0                             2            100              
  priority_201804 balance_201804 action_201804
1              99              0              
2               0            -20              
3               0            -50              
4               0           -100          text

Answer 1

library(tidyverse)

df2 %>%
  left_join(df1, by=c("unique_ref")) %>%                # join df1 to df2
  select(-actual_date) %>%                              # remove column you won't need
  mutate(date_band = paste0("action", date_band)) %>%   # update column values
  spread(date_band, action)                             # reshape to get the format you want


#   unique_ref priority_201801 balance_201801 priority_201802 balance_201802 priority_201803 balance_201803
# 1          1               3             30               1             60               2             30
# 2          2               2            -20               1            -40               2            -40
# 3          3               3             35               1             35               3            -50
# 4          4               0           -100               2              0               2            100
#     priority_201804 balance_201804 action_201801 action_201802 action_201803 action_201804
#   1              99              0          text        letter          <NA>          <NA>
#   2               0            -20         visit          <NA>          call          <NA>
#   3               0            -50        letter          <NA>          text          <NA>
#   4               0           -100          call          <NA>          <NA>          text

对于特定的ref和date_band有多个操作的情况，可以使用以下方法：

library(tidyverse)

# update df1
df1_upd = df1 %>%
  group_by(unique_ref, date_band) %>%                     # for every combination of ref and date_band
  summarise(action = paste0(action, collapse = ",")) %>%  # combine actions
  ungroup()                                               # forget the grouping


df2 %>%
  left_join(df1_upd, by=c("unique_ref")) %>%                
  mutate(date_band = paste0("action", date_band)) %>% 
  spread(date_band, action)

#   unique_ref priority_201801 balance_201801 priority_201802 balance_201802 priority_201803 balance_201803
# 1          1               3             30               1             60               2             30
# 2          2               2            -20               1            -40               2            -40
# 3          3               3             35               1             35               3            -50
# 4          4               0           -100               2              0               2            100
#     priority_201804 balance_201804 action_201801 action_201802 action_201803 action_201804
#   1              99              0   text,letter        letter          <NA>          <NA>
#   2               0            -20         visit          <NA>          call          <NA>
#   3               0            -50        letter          <NA>          text          <NA>
#   4               0           -100          call          <NA>          <NA>          text

通过在R中合并将行更改为列

1 个答案: