如果缺少,则在结果上创建带有标识符和NA的新行

时间:2017-12-11 09:43:49

标签: r dplyr tidyverse

我有一个数据框,如下图所示:

   id day  time trial outcome
1   1   1 15:10     2   FALSE
2   1   2 15:01     2    TRUE
3   1   2 20:10     3   FALSE
4   1   3 11:10     1   FALSE
5   1   4 15:10     2   FALSE
6   1   5 20:13     3   FALSE
7   1   6 11:10     1   FALSE
8   1   6 15:10     2   FALSE
9   1   7 11:10     1   FALSE
10  1   7 15:09     2    TRUE
11  1   7 20:00     3    TRUE
12  1   8 11:10     1   FALSE
13  1   8 15:01     2    TRUE
14  1   9 15:00     2    TRUE
15  1   9 20:06     3    TRUE
16  1  10 11:10     1   FALSE
17  1  11 11:10     1   FALSE
18  1  11 15:00     2    TRUE
19  1  12 20:00     3    TRUE
20  1  13 15:02     2    TRUE

每天参与者接受3次试验。结果是他们是否看了审判。如果他们没看,则没有记录数据行。我想做的是为那些缺失的试验创建行。

所以在上面的例子中,在第1天:需要添加试验1和3;其中id = 1,day = 1,trial = 1和3,但时间和结果有NA。在第2天,只需要添加试验1;其中id = 1,day = 2,trial = 1,但时间和结果再次为NA。

我一直在玩,但一直找不到可靠的解决方案。

可重复数据集:

structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", 
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", 
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", 
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", 
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", 
"91", "92", "93", "94", "95", "96"), class = "factor"), day = c(1, 
2, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13), 
    time = c("15:10", "15:01", "20:10", "11:10", "15:10", "20:13", 
    "11:10", "15:10", "11:10", "15:09", "20:00", "11:10", "15:01", 
    "15:00", "20:06", "11:10", "11:10", "15:00", "20:00", "15:02"
    ), trial = c(2, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 2, 3, 
    1, 1, 2, 3, 2), outcome = structure(c(1L, 2L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L
    ), .Label = c("FALSE", "TRUE"), class = "factor")), .Names = c("id", 
"day", "time", "trial", "outcome"), row.names = c(NA, 20L), class = "data.frame")

3 个答案:

答案 0 :(得分:1)

以下是使用merge的基础R的解决方案。

# Skeleton dataframe as template
df2 <- cbind.data.frame(
    day = rep(1:max(df$day), each = max(df$trial)),
    trial = rep(1:max(df$trial), max(df$day)));

# 
df <- merge(df, df2, by = c("day", "trial"), all = TRUE)
df;
#   day trial   id  time outcome
#1    1     1 <NA>  <NA>    <NA>
#2    1     2    1 15:10   FALSE
#3    1     3 <NA>  <NA>    <NA>
#4    2     1 <NA>  <NA>    <NA>
#5    2     2    1 15:01    TRUE
#6    2     3    1 20:10   FALSE
#7    3     1    1 11:10   FALSE
#8    3     2 <NA>  <NA>    <NA>
#9    3     3 <NA>  <NA>    <NA>
#10   4     1 <NA>  <NA>    <NA>
#11   4     2    1 15:10   FALSE
#12   4     3 <NA>  <NA>    <NA>
#13   5     1 <NA>  <NA>    <NA>
#14   5     2 <NA>  <NA>    <NA>
#15   5     3    1 20:13   FALSE
#16   6     1    1 11:10   FALSE
#17   6     2    1 15:10   FALSE
#18   6     3 <NA>  <NA>    <NA>
#19   7     1    1 11:10   FALSE
#20   7     2    1 15:09    TRUE
#21   7     3    1 20:00    TRUE
#22   8     1    1 11:10   FALSE
#23   8     2    1 15:01    TRUE
#24   8     3 <NA>  <NA>    <NA>
#25   9     1 <NA>  <NA>    <NA>
#26   9     2    1 15:00    TRUE
#27   9     3    1 20:06    TRUE
#28  10     1    1 11:10   FALSE
#29  10     2 <NA>  <NA>    <NA>
#30  10     3 <NA>  <NA>    <NA>
#31  11     1    1 11:10   FALSE
#32  11     2    1 15:00    TRUE
#33  11     3 <NA>  <NA>    <NA>
#34  12     1 <NA>  <NA>    <NA>
#35  12     2 <NA>  <NA>    <NA>
#36  12     3    1 20:00    TRUE
#37  13     1 <NA>  <NA>    <NA>
#38  13     2    1 15:02    TRUE
#39  13     3 <NA>  <NA>    <NA>

说明:根据最大天数和试验构建骨架(模板)dataframe。然后使用dataframe将骨架与原始merge(..., by = c("day", "trial"), all = TRUE)合并,以NA标记缺少的条目。

样本数据

df <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57",
"58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
"91", "92", "93", "94", "95", "96"), class = "factor"), day = c(1,
2, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13),
    time = c("15:10", "15:01", "20:10", "11:10", "15:10", "20:13",
    "11:10", "15:10", "11:10", "15:09", "20:00", "11:10", "15:01",
    "15:00", "20:06", "11:10", "11:10", "15:00", "20:00", "15:02"
    ), trial = c(2, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 2, 3,
    1, 1, 2, 3, 2), outcome = structure(c(1L, 2L, 1L, 1L, 1L,
    1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L
    ), .Label = c("FALSE", "TRUE"), class = "factor")), .Names = c("id",
"day", "time", "trial", "outcome"), row.names = c(NA, 20L), class = "data.frame")

答案 1 :(得分:1)

您可以结合使用expand.grid()获取所有可能的组合,然后merge使用原始data.frame组合。

dat <- data.frame(id = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
                         "14", "15", "16", "17", "18", "19", "20"),
                  day = c(1, 2, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13), 
                  time = c("15:10", "15:01", "20:10", "11:10", "15:10", "20:13", 
                           "11:10", "15:10", "11:10", "15:09", "20:00", "11:10", "15:01", 
                           "15:00", "20:06", "11:10", "11:10", "15:00", "20:00", "15:02"),
                  trial = c(2, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 2, 3, 1, 1, 2, 3, 2),
                  outcome = structure(c(1L, 2L, 1L, 1L, 1L,  1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L),
                                      .Label = c("FALSE", "TRUE"), class = "factor"))

vals <- expand.grid(id = unique(dat$id),
                    day = unique(dat$day),
                    trial = unique(dat$trial))
vals$time <- NA
vals$outcome <- NA 

dat2 <- merge(vals, dat, all = TRUE)

答案 2 :(得分:1)

这是来自 NSRect imageRect = NSMakeRect(0.0, 0.0, 50000, 25000); NSBitmapImageRep *savedImageBitmapRep = [[NSBitmapImageRep alloc] initWithBitmapDataPlanes:nil pixelsWide:imageRect.size.width pixelsHigh:imageRect.size.height bitsPerSample:8 samplesPerPixel:4 hasAlpha:YES isPlanar:NO colorSpaceName:NSCalibratedRGBColorSpace bitmapFormat:0 bytesPerRow:(4 * imageRect.size.width) bitsPerPixel:32]; [NSGraphicsContext saveGraphicsState]; [NSGraphicsContext setCurrentContext:[NSGraphicsContext graphicsContextWithBitmapImageRep: savedImageBitmapRep]]; // Read 1.jpeg, 2.jpeg, 3.jpeg, 4.jpeg, 5.jpeg as NSImage // and draw them on the current context in their respective location [NSGraphicsContext restoreGraphicsState]; NSMutableData *imageData = [NSMutableData data]; CGImageDestinationRef imageDest = CGImageDestinationCreateWithData( (__bridge CFMutableDataRef)imageData, kUTTypeJPEG, 1, NULL); CGImageDestinationAddImage(imageDest, [savedImageBitmapRep CGImage], (__bridge CFDictionaryRef)properties); CGImageDestinationFinalize(imageDest); if (imageDest != NULL) { CFRelease(imageDest); } //write imageData to a JPEG file 的{​​{1}}的作业,

complete