合并数据框并根据条件创建列

时间:2018-11-27 14:51:30

标签: r dplyr

我有2个数据帧

数据框A:

Time Reading
1      20 
2      23
3      25
4      22
5      24
6      23 
7      24
8      23
9      23
10     22

数据框B:

TimeStart TimeEnd Alarm
 2            5     556
 7            9     556

我想创建以下联接的数据框:

Time Reading Alarmtime Alarm alarmno 
 1    20        n/a     n/a    n/a
 2    23         2      556     1
 3    25                556     1
 4    22                556     1 
 5    24         5      556     1
 6    23         n/a    n/a     n/a
 7    24         7      556     2
 8    23                556     2
 9    23         9      556     2
 10   22         n/a    n/a     n/a

我可以很容易地进行联接,但是在使以下行充满警报之前一直很费力,直到警报结束为止。同时为每个单独的警报编号,因此即使它们是相同的警报,也要分别计数。关于如何执行此操作的任何想法都很棒

谢谢

1 个答案:

答案 0 :(得分:0)

library(sqldf)

df_b$AlarmNo <- seq_len(nrow(df_b))

sqldf('
select  a.Time
        , a.Reading
        , case  when a.Time in (b.TimeStart, b.TimeEnd)
                  then a.Time
                else NULL
        end as AlarmTime
        , b.Alarm
        , b.AlarmNo
from    df_a a
        left join df_b b
          on a.Time between b.TimeStart and b.TimeEnd
')

#    Time Reading AlarmTime Alarm AlarmNo
# 1     1      20        NA    NA      NA
# 2     2      23         2   556       1
# 3     3      25        NA   556       1
# 4     4      22        NA   556       1
# 5     5      24         5   556       1
# 6     6      23        NA    NA      NA
# 7     7      24         7   556       2
# 8     8      23        NA   556       2
# 9     9      23         9   556       2
# 10   10      22        NA    NA      NA

library(data.table)
setDT(df_b)

df_c <- 
  df_b[, .(Time = seq(TimeStart, TimeEnd), Alarm, AlarmNo = .GRP)
       , by = TimeStart]

merge(df_a, df_c, by = 'Time', all.x = T)

#     Time Reading TimeStart Alarm AlarmNo
#  1:    1      20        NA    NA      NA
#  2:    2      23         2   556       1
#  3:    3      25         2   556       1
#  4:    4      22         2   556       1
#  5:    5      24         2   556       1
#  6:    6      23        NA    NA      NA
#  7:    7      24         7   556       2
#  8:    8      23         7   556       2
#  9:    9      23         7   556       2
# 10:   10      22        NA    NA      NA

使用的数据:

df_a <- fread('
Time Reading
1      20 
2      23
3      25
4      22
5      24
6      23 
7      24
8      23
9      23
10     22
')

df_b <- fread('
TimeStart TimeEnd Alarm
 2            5     556
 7            9     556
')
相关问题