数据框中的数据填充基于R中的不同列和行选择

时间:2017-04-11 06:59:26

标签: r dataframe

我有以下输入数据:

Type    Start       End         Value
Normal  14:10:01    14:20:00    0.05454545
Normal  14:50:01    15:00:00    0.05
Normal  15:00:01    15:10:00    0.056
Normal  15:10:01    15:20:00    0.05121951
Normal  15:20:01    15:30:00    0.05
Spl1    13:20:01    13:30:00    0.05089286
Spl1    13:40:01    13:50:00    0.05
Spl1    13:50:01    14:00:00    0.04848485
Spl2    14:30:01    14:40:00    0.05208333
Spl2    14:50:01    15:00:00    0.05
Spl2    15:20:01    15:30:00    0.05

输入表的数据结构:

 $ Type                 : chr
 $ Start                : chr
 $ End                  : chr
 $ Value                : num

输出表结构如下:

Start       End       Normal    Spl1    Spl2
13:00:01    13:10:00            
13:10:01    13:20:00            
13:20:01    13:30:00            
13:30:01    13:40:00            
13:40:01    13:50:00            
13:50:01    14:00:00            
14:00:01    14:10:00            
14:10:01    14:20:00            
14:20:01    14:30:00            
14:30:01    14:40:00            
14:40:01    14:50:00            
14:50:01    15:00:00            
15:00:01    15:10:00            
15:10:01    15:20:00            
15:20:01    15:30:00            

我希望根据以下标准填充数据:

  1. (Input_table$Start == Output_table$Start) && (Input_table$End == Output_table$End)
  2. 输入表"Normal" type rows应将值填充到Output table's Normal Column。同样,"Up" and "Down" type rows应该是Up and Down columns in output table
  3. 输出表格中的空白字段应填充0
  4. 所需的输出表应如下所示:

    Start       End         Normal      Spl1        Spl2
    13:00:01    13:10:00    0           0           0
    13:10:01    13:20:00    0           0           0
    13:20:01    13:30:00    0           0.05089286  0
    13:30:01    13:40:00    0           0           0
    13:40:01    13:50:00    0           0.05        0
    13:50:01    14:00:00    0           0.04848485  0
    14:00:01    14:10:00    0           0           0
    14:10:01    14:20:00    0.05454545  0           0
    14:20:01    14:30:00    0           0           0
    14:30:01    14:40:00    0           0           0.05208333
    14:40:01    14:50:00    0           0           0
    14:50:01    15:00:00    0.05        0           0.05
    15:00:01    15:10:00    0.056       0           0
    15:10:01    15:20:00    0.05121951  0           0
    15:20:01    15:30:00    0.05        0           0.05
    

    请建议可能的解决方案。在R

    注意:我不想为此解决方案使用循环。我正在寻找直接的方法。请建议是否有。

2 个答案:

答案 0 :(得分:1)

我们可以left_join然后spread输出到'范围

library(dplyr)
library(tidyr)
left_join(df2, df1) %>% 
        spread(Type, Value) %>% 
        select(1:5) %>% 
        mutate_at(vars(Normal, Spl1, Spl2), funs(replace(., is.na(.), 0)))
#     Start      End     Normal       Spl1       Spl2
#1  13:00:01 13:10:00 0.00000000 0.00000000 0.00000000
#2  13:10:01 13:20:00 0.00000000 0.00000000 0.00000000
#3  13:20:01 13:30:00 0.00000000 0.05089286 0.00000000
#4  13:30:01 13:40:00 0.00000000 0.00000000 0.00000000
#5  13:40:01 13:50:00 0.00000000 0.05000000 0.00000000
#6  13:50:01 14:00:00 0.00000000 0.04848485 0.00000000
#7  14:00:01 14:10:00 0.00000000 0.00000000 0.00000000
#8  14:10:01 14:20:00 0.05454545 0.00000000 0.00000000
#9  14:20:01 14:30:00 0.00000000 0.00000000 0.00000000
#10 14:30:01 14:40:00 0.00000000 0.00000000 0.05208333
#11 14:40:01 14:50:00 0.00000000 0.00000000 0.00000000
#12 14:50:01 15:00:00 0.05000000 0.00000000 0.05000000
#13 15:00:01 15:10:00 0.05600000 0.00000000 0.00000000
#14 15:10:01 15:20:00 0.05121951 0.00000000 0.00000000
#15 15:20:01 15:30:00 0.05000000 0.00000000 0.05000000

数据

 df1 <- structure(list(Type = c("Normal", "Normal", "Normal", "Normal", 
"Normal", "Spl1", "Spl1", "Spl1", "Spl2", "Spl2", "Spl2"), Start = c("14:10:01", 
"14:50:01", "15:00:01", "15:10:01", "15:20:01", "13:20:01", "13:40:01", 
"13:50:01", "14:30:01", "14:50:01", "15:20:01"), End = c("14:20:00", 
"15:00:00", "15:10:00", "15:20:00", "15:30:00", "13:30:00", "13:50:00", 
"14:00:00", "14:40:00", "15:00:00", "15:30:00"), Value = c(0.05454545, 
0.05, 0.056, 0.05121951, 0.05, 0.05089286, 0.05, 0.04848485, 
0.05208333, 0.05, 0.05)), .Names = c("Type", "Start", "End", 
"Value"), class = "data.frame", row.names = c(NA, -11L))

   df2 <- structure(list(Start = c("13:00:01", "13:10:01", "13:20:01", 
"13:30:01", "13:40:01", "13:50:01", "14:00:01", "14:10:01", "14:20:01", 
"14:30:01", "14:40:01", "14:50:01", "15:00:01", "15:10:01", "15:20:01"
), End = c("13:10:00", "13:20:00", "13:30:00", "13:40:00", "13:50:00", 
"14:00:00", "14:10:00", "14:20:00", "14:30:00", "14:40:00", "14:50:00", 
"15:00:00", "15:10:00", "15:20:00", "15:30:00")), .Names = c("Start", 
"End"), class = "data.frame", row.names = c(NA, -15L))

答案 1 :(得分:1)

这可以在data.table

的单行中完成
dcast(dt1[dt2, on = c("Start", "End")], ... ~ Type, fill = 0)[, `NA` := NULL][]

将返回

       Start      End     Normal       Spl1       Spl2
 1: 13:00:01 13:10:00 0.00000000 0.00000000 0.00000000
 2: 13:10:01 13:20:00 0.00000000 0.00000000 0.00000000
 3: 13:20:01 13:30:00 0.00000000 0.05089286 0.00000000
 4: 13:30:01 13:40:00 0.00000000 0.00000000 0.00000000
 5: 13:40:01 13:50:00 0.00000000 0.05000000 0.00000000
 6: 13:50:01 14:00:00 0.00000000 0.04848485 0.00000000
 7: 14:00:01 14:10:00 0.00000000 0.00000000 0.00000000
 8: 14:10:01 14:20:00 0.05454545 0.00000000 0.00000000
 9: 14:20:01 14:30:00 0.00000000 0.00000000 0.00000000
10: 14:30:01 14:40:00 0.00000000 0.00000000 0.05208333
11: 14:40:01 14:50:00 0.00000000 0.00000000 0.00000000
12: 14:50:01 15:00:00 0.05000000 0.00000000 0.05000000
13: 15:00:01 15:10:00 0.05600000 0.00000000 0.00000000
14: 15:10:01 15:20:00 0.05121951 0.00000000 0.00000000
15: 15:20:01 15:30:00 0.05000000 0.00000000 0.05000000

数据

library(data.table)
dt1 <- fread(
"Type    Start       End         Value
Normal  14:10:01    14:20:00    0.05454545
Normal  14:50:01    15:00:00    0.05
Normal  15:00:01    15:10:00    0.056
Normal  15:10:01    15:20:00    0.05121951
Normal  15:20:01    15:30:00    0.05
Spl1    13:20:01    13:30:00    0.05089286
Spl1    13:40:01    13:50:00    0.05
Spl1    13:50:01    14:00:00    0.04848485
Spl2    14:30:01    14:40:00    0.05208333
Spl2    14:50:01    15:00:00    0.05
Spl2    15:20:01    15:30:00    0.05"
)

dt2 <- fread(
  "Start       End     
13:00:01    13:10:00            
13:10:01    13:20:00            
13:20:01    13:30:00            
13:30:01    13:40:00            
13:40:01    13:50:00            
13:50:01    14:00:00            
14:00:01    14:10:00            
14:10:01    14:20:00            
14:20:01    14:30:00            
14:30:01    14:40:00            
14:40:01    14:50:00            
14:50:01    15:00:00            
15:00:01    15:10:00            
15:10:01    15:20:00            
15:20:01    15:30:00   "
)