Question

我有计数数据（Pup列），需要将其转换为二进制记录。我想根据计数数据创建二进制记录，例如，如果我的计数为7，则我想创建7个二进制（存在）记录，同时重复并保留所有关联数据。

我需要为此执行几个数据集，因此想创建一个函数来执行此操作。

Pup(count) Year  Survey.Typ   File.Name                     ice.ras.va
2          2008    Air        asi-n3125-20080223.tif 200    -32.0000

here is an example of the data ( would like to include all the columns:
structure(list(number = c(69L, 28L, 50L, 49L, 34L, 19L), siteID = structure(c(69L, 
28L, 50L, 49L, 34L, 19L), .Label = c("Point-001", "Point-002", 
"Point-003", "Point-004", "Point-005", "Point-006", "Point-007", 
"Point-008", "Point-009", "Point-010", "Point-011", "Point-012", 
"Point-013", "Point-014", "Point-015", "Point-016", "Point-017", 
"Point-018", "Point-019", "Point-020", "Point-021", "Point-022", 
"Point-023", "Point-024", "Point-025", "Point-026", "Point-027", 
"Point-028", "Point-029", "Point-030", "Point-031", "Point-032", 
"Point-033", "Point-034", "Point-035", "Point-036", "Point-037", 
"Point-038", "Point-039", "Point-040", "Point-041", "Point-042", 
"Point-043", "Point-044", "Point-045", "Point-046", "Point-047", 
"Point-048", "Point-049", "Point-050", "Point-051", "Point-052", 
"Point-053", "Point-054", "Point-055", "Point-056", "Point-057", 
"Point-058", "Point-059", "Point-060", "Point-061", "Point-062", 
"Point-063", "Point-064", "Point-065", "Point-066", "Point-067", 
"Point-068", "Point-069", "Point-070", "Point-071", "Point-072", 
"Point-073", "Point-074", "Point-075", "Point-076", "Point-077", 
"Point-078", "Point-079", "Point-080", "Point-081", "Point-082", 
"Point-083", "Point-084", "Point-085", "Point-086", "Point-087", 
"Point-088", "Point-089", "Point-090", "Point-091", "Point-092", 
"Point-093", "Point-094", "Point-095", "Point-096", "Point-097", 
"Point-098", "Point-099", "Point-100", "Point-101", "Point-102", 
"Point-103", "Point-104"), class = "factor"), xcoord = c(-135685, 
-88531.6, -65340, -88665.8, -73700.1, -73921.4), ycoord = c(-4550119L, 
-4519471L, -4487622L, -4521847L, -4511036L, -4527001L), mdcaty = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = "Equal", class = "factor"), wgt = c(3.035714, 
8.727273, 5.95, 5.95, 5.95, 3), stratum = structure(c(4L, 2L, 
3L, 3L, 3L, 1L), .Label = c("(100,120]", "(120,140]", "(140,160]", 
"(160,180]", "(180,200]", "(20,40]", "(40,60]", "(60,80]", "(80,100]", 
"[0,20]"), class = "factor"), panel = structure(c(1L, 1L, 1L, 
1L, 1L, 1L), .Label = "PanelOne", class = "factor"), EvalStatus = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = "NotEval", class = "factor"), EvalReason = c(NA, 
NA, NA, NA, NA, NA), Photo = structure(c(34L, 13L, 72L, 15L, 
75L, 76L), .Label = c("IMG_0339", "IMG_0349", "IMG_0376", "IMG_0390", 
"IMG_0413", "IMG_0419", "IMG_0458", "IMG_0491", "IMG_0512", "IMG_0514", 
"IMG_0532", "IMG_0537", "IMG_0592", "IMG_0619", "IMG_0631", "IMG_0651", 
"IMG_0660", "IMG_0692", "IMG_0699", "IMG_0715", "IMG_0728", "IMG_0743", 
"IMG_0782", "IMG_0796", "IMG_0819", "IMG_0838", "IMG_0862", "IMG_0865", 
"IMG_0883", "IMG_0890", "IMG_0905", "IMG_0908", "IMG_0912", "IMG_0928", 
"IMG_0934", "IMG_0937", "IMG_0948", "IMG_0977", "IMG_1003", "IMG_1009", 
"IMG_1030", "IMG_1060", "IMG_1275", "IMG_1278", "IMG_1286", "IMG_1337", 
"IMG_1342", "IMG_1361", "IMG_1365", "IMG_1368", "IMG_1395", "IMG_1398", 
"IMG_1406", "IMG_1414", "IMG_1426", "IMG_1427", "IMG_1430", "IMG_1449", 
"IMG_1451", "IMG_1454", "IMG_1470", "IMG_1486", "IMG_1501", "IMG_4425", 
"IMG_4428", "IMG_4430", "IMG_4431", "IMG_4440", "IMG_4444", "IMG_4446", 
"IMG_4448", "IMG_4464", "IMG_4477", "IMG_4485", "IMG_4493", "IMG_4504", 
"IMG_4528", "IMG_4529", "IMG_4555", "IMG_4560", "IMG_4588", "IMG_4590", 
"IMG_4601", "IMG_4625", "IMG_4673", "IMG_4688", "IMG_4705", "IMG_4709", 
"IMG_4714", "IMG_4717", "IMG_4720", "IMG_4722", "IMG_4731", "IMG_4735", 
"IMG_4737", "IMG_4739", "IMG_4742", "IMG_4746", "IMG_4747", "IMG_4755", 
"IMG_4761", "IMG_4766", "IMG_4773", "IMG_4780"), class = "factor"), 
    Date = c(14647L, 14647L, 14647L, 14647L, 14647L, 14647L), 
    Time = structure(c(5L, 5L, 5L, 5L, 5L, 5L), .Label = c("05:00-07:30", 
    "05:20-07:30", "05:30-07:30", "06:00-07:30", "07:30-11:00", 
    "10:30-12:30"), class = "factor"), Adult = c(3L, 4L, 4L, 
    4L, 6L, 7L), Pair = c(1L, 0L, 1L, 5L, 1L, 0L), Pup = c(0L, 
    0L, 5L, 1L, 0L, 0L), Year = c(2010L, 2010L, 2010L, 2010L, 
    2010L, 2010L), Survey.Typ = structure(c(1L, 1L, 1L, 1L, 1L, 
    1L), .Label = "Air", class = "factor"), File.Name = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("asi-n3125-20100207.tif", 
    "asi-n3125-20100213.tif", "asi-n3125-20100214.tif", "asi-n3125-20100215.tif", 
    "asi-n3125-20100216.tif", "asi-n3125-20100218.tif"), class = "factor"), 
    ice.ras.va = c(162.5, 140, 151, 142.5, 151.25, 107.6667), 
    bath.ras.v = c(-38.8758, -39.0064, -32, -39, -37.0382, -40.0011
    ), water.dist = c(11048.54, 17258.38, 39836.09, 14486.11, 
    22152.51, 6893.682), shore.dist = c(91652.52, 95836.59, 71211.21, 
    98830.29, 90601.87, 99383.33), coords.x1 = c(-135685, -88531.6, 
    -65340, -88665.8, -73700.1, -73921.4), coords.x2 = c(-4550119L, 
    -4519471L, -4487622L, -4521847L, -4511036L, -4527001L), optional = c(TRUE, 
    TRUE, TRUE, TRUE, TRUE, TRUE), Thick.Ice = c(FALSE, TRUE, 
    TRUE, TRUE, TRUE, TRUE), Thin.Ice = c(FALSE, FALSE, TRUE, 
    TRUE, TRUE, FALSE), Pancake.ice = c(FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE), Dense.Brash = c(FALSE, FALSE, TRUE, 
    TRUE, TRUE, FALSE), Loose.Brash = c(TRUE, FALSE, TRUE, FALSE, 
    FALSE, FALSE), Thin.Ridges = c(FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE), Thick.Ridges = c(FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE), Cracks.Leads = c(FALSE, TRUE, FALSE, FALSE, 
    TRUE, FALSE), Poliynas = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE), Open.Water = c(TRUE, FALSE, FALSE, FALSE, FALSE, 
    TRUE), Seal.breathing.hole = c(FALSE, FALSE, TRUE, TRUE, 
    FALSE, FALSE), Blood = c(FALSE, FALSE, TRUE, FALSE, TRUE, 
    FALSE), X = structure(c(1L, 1L, 1L, 1L, 1L, 10L), .Label = c("", 
    "BREETHING HOLE HARD TO SEE", "CHECK - CENTRAL REGION OF THICKER ICE", 
    "DAMAGED IMAGE", "HARD TO DISTINGUISH IF SEAL AND BREATHING HOLE PRESENT", 
    "ICE OR SNOW - CHECK", "MOSAIC", "NO EVIDENCE OF BREATHING HOLE", 
    "PRESENCE OF ICE DRIFTS (SMALL)", "SEAL PRESENT ON A SMALL ISLAND OF ICE", 
    "SNOW DRIFTS"), class = "factor")), row.names = 99:104, class = "data.frame")

我希望在Pups列上具有显示存在和不存在的数据框，以便在一个站点有多个计数的地方，将有多个存在数据记录。

预期结果是：

Pup(present) Year  Survey.Typ   File.Name                     ice.ras.va
1            2008    Air        asi-n3125-20080223.tif 200    -32.0000
1            2008    Air        asi-n3125-20080223.tif 200    -32.0000

Answer 1

一种解决方案是在单元格中创建一个“存在”事件列表，然后使用unnest包中的tidyr取消嵌套列表，即展开data.frame。

library("dplyr")

# Making up some data
df <- data.frame(pup_id = 1:10,
                 metadata = sample(LETTERS, 10),
                 count = rpois(10, 3) + 1L)

#    pup_id metadata count
# 1       1        M     2
# 2       2        X     6
# 3       3        O     3
# 4       4        Z     6
# 5       5        H     7
# 6       6        U     1
# 7       7        L     2
# 8       8        P     5
# 9       9        Q     6
# 10     10        S     8

# Create a vector of presence for each pup
df_with_presence_list <-
  df %>%
  group_by(pup_id) %>%
  mutate(presence = list(rep(1, count)))

# This has added a list-column that we can now unnest
# You can do it all in one step but it was to show you

df_with_presence <- 
  df_with_presence_list %>%
  tidyr::unnest(presence)

#   # A tibble: 46 x 4
#   # Groups:   pup_id [10]
#   pup_id metadata count presence
#    <int> <fct>    <int>    <dbl>
# 1      1 M            2        1
# 2      1 M            2        1
# 3      2 X            6        1
# 4      2 X            6        1
# 5      2 X            6        1
# 6      2 X            6        1
# 7      2 X            6        1
# 8      2 X            6        1
# 9      3 O            3        1
# 10     3 O            3        1
# # … with 36 more rows

将计数数据转换为许多二进制记录

1 个答案: