我有计数数据(Pup列),需要将其转换为二进制记录。我想根据计数数据创建二进制记录,例如,如果我的计数为7,则我想创建7个二进制(存在)记录,同时重复并保留所有关联数据。
我需要为此执行几个数据集,因此想创建一个函数来执行此操作。
Pup(count) Year Survey.Typ File.Name ice.ras.va
2 2008 Air asi-n3125-20080223.tif 200 -32.0000
here is an example of the data ( would like to include all the columns:
structure(list(number = c(69L, 28L, 50L, 49L, 34L, 19L), siteID = structure(c(69L,
28L, 50L, 49L, 34L, 19L), .Label = c("Point-001", "Point-002",
"Point-003", "Point-004", "Point-005", "Point-006", "Point-007",
"Point-008", "Point-009", "Point-010", "Point-011", "Point-012",
"Point-013", "Point-014", "Point-015", "Point-016", "Point-017",
"Point-018", "Point-019", "Point-020", "Point-021", "Point-022",
"Point-023", "Point-024", "Point-025", "Point-026", "Point-027",
"Point-028", "Point-029", "Point-030", "Point-031", "Point-032",
"Point-033", "Point-034", "Point-035", "Point-036", "Point-037",
"Point-038", "Point-039", "Point-040", "Point-041", "Point-042",
"Point-043", "Point-044", "Point-045", "Point-046", "Point-047",
"Point-048", "Point-049", "Point-050", "Point-051", "Point-052",
"Point-053", "Point-054", "Point-055", "Point-056", "Point-057",
"Point-058", "Point-059", "Point-060", "Point-061", "Point-062",
"Point-063", "Point-064", "Point-065", "Point-066", "Point-067",
"Point-068", "Point-069", "Point-070", "Point-071", "Point-072",
"Point-073", "Point-074", "Point-075", "Point-076", "Point-077",
"Point-078", "Point-079", "Point-080", "Point-081", "Point-082",
"Point-083", "Point-084", "Point-085", "Point-086", "Point-087",
"Point-088", "Point-089", "Point-090", "Point-091", "Point-092",
"Point-093", "Point-094", "Point-095", "Point-096", "Point-097",
"Point-098", "Point-099", "Point-100", "Point-101", "Point-102",
"Point-103", "Point-104"), class = "factor"), xcoord = c(-135685,
-88531.6, -65340, -88665.8, -73700.1, -73921.4), ycoord = c(-4550119L,
-4519471L, -4487622L, -4521847L, -4511036L, -4527001L), mdcaty = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "Equal", class = "factor"), wgt = c(3.035714,
8.727273, 5.95, 5.95, 5.95, 3), stratum = structure(c(4L, 2L,
3L, 3L, 3L, 1L), .Label = c("(100,120]", "(120,140]", "(140,160]",
"(160,180]", "(180,200]", "(20,40]", "(40,60]", "(60,80]", "(80,100]",
"[0,20]"), class = "factor"), panel = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "PanelOne", class = "factor"), EvalStatus = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = "NotEval", class = "factor"), EvalReason = c(NA,
NA, NA, NA, NA, NA), Photo = structure(c(34L, 13L, 72L, 15L,
75L, 76L), .Label = c("IMG_0339", "IMG_0349", "IMG_0376", "IMG_0390",
"IMG_0413", "IMG_0419", "IMG_0458", "IMG_0491", "IMG_0512", "IMG_0514",
"IMG_0532", "IMG_0537", "IMG_0592", "IMG_0619", "IMG_0631", "IMG_0651",
"IMG_0660", "IMG_0692", "IMG_0699", "IMG_0715", "IMG_0728", "IMG_0743",
"IMG_0782", "IMG_0796", "IMG_0819", "IMG_0838", "IMG_0862", "IMG_0865",
"IMG_0883", "IMG_0890", "IMG_0905", "IMG_0908", "IMG_0912", "IMG_0928",
"IMG_0934", "IMG_0937", "IMG_0948", "IMG_0977", "IMG_1003", "IMG_1009",
"IMG_1030", "IMG_1060", "IMG_1275", "IMG_1278", "IMG_1286", "IMG_1337",
"IMG_1342", "IMG_1361", "IMG_1365", "IMG_1368", "IMG_1395", "IMG_1398",
"IMG_1406", "IMG_1414", "IMG_1426", "IMG_1427", "IMG_1430", "IMG_1449",
"IMG_1451", "IMG_1454", "IMG_1470", "IMG_1486", "IMG_1501", "IMG_4425",
"IMG_4428", "IMG_4430", "IMG_4431", "IMG_4440", "IMG_4444", "IMG_4446",
"IMG_4448", "IMG_4464", "IMG_4477", "IMG_4485", "IMG_4493", "IMG_4504",
"IMG_4528", "IMG_4529", "IMG_4555", "IMG_4560", "IMG_4588", "IMG_4590",
"IMG_4601", "IMG_4625", "IMG_4673", "IMG_4688", "IMG_4705", "IMG_4709",
"IMG_4714", "IMG_4717", "IMG_4720", "IMG_4722", "IMG_4731", "IMG_4735",
"IMG_4737", "IMG_4739", "IMG_4742", "IMG_4746", "IMG_4747", "IMG_4755",
"IMG_4761", "IMG_4766", "IMG_4773", "IMG_4780"), class = "factor"),
Date = c(14647L, 14647L, 14647L, 14647L, 14647L, 14647L),
Time = structure(c(5L, 5L, 5L, 5L, 5L, 5L), .Label = c("05:00-07:30",
"05:20-07:30", "05:30-07:30", "06:00-07:30", "07:30-11:00",
"10:30-12:30"), class = "factor"), Adult = c(3L, 4L, 4L,
4L, 6L, 7L), Pair = c(1L, 0L, 1L, 5L, 1L, 0L), Pup = c(0L,
0L, 5L, 1L, 0L, 0L), Year = c(2010L, 2010L, 2010L, 2010L,
2010L, 2010L), Survey.Typ = structure(c(1L, 1L, 1L, 1L, 1L,
1L), .Label = "Air", class = "factor"), File.Name = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("asi-n3125-20100207.tif",
"asi-n3125-20100213.tif", "asi-n3125-20100214.tif", "asi-n3125-20100215.tif",
"asi-n3125-20100216.tif", "asi-n3125-20100218.tif"), class = "factor"),
ice.ras.va = c(162.5, 140, 151, 142.5, 151.25, 107.6667),
bath.ras.v = c(-38.8758, -39.0064, -32, -39, -37.0382, -40.0011
), water.dist = c(11048.54, 17258.38, 39836.09, 14486.11,
22152.51, 6893.682), shore.dist = c(91652.52, 95836.59, 71211.21,
98830.29, 90601.87, 99383.33), coords.x1 = c(-135685, -88531.6,
-65340, -88665.8, -73700.1, -73921.4), coords.x2 = c(-4550119L,
-4519471L, -4487622L, -4521847L, -4511036L, -4527001L), optional = c(TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE), Thick.Ice = c(FALSE, TRUE,
TRUE, TRUE, TRUE, TRUE), Thin.Ice = c(FALSE, FALSE, TRUE,
TRUE, TRUE, FALSE), Pancake.ice = c(FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE), Dense.Brash = c(FALSE, FALSE, TRUE,
TRUE, TRUE, FALSE), Loose.Brash = c(TRUE, FALSE, TRUE, FALSE,
FALSE, FALSE), Thin.Ridges = c(FALSE, FALSE, TRUE, FALSE,
FALSE, FALSE), Thick.Ridges = c(FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE), Cracks.Leads = c(FALSE, TRUE, FALSE, FALSE,
TRUE, FALSE), Poliynas = c(FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE), Open.Water = c(TRUE, FALSE, FALSE, FALSE, FALSE,
TRUE), Seal.breathing.hole = c(FALSE, FALSE, TRUE, TRUE,
FALSE, FALSE), Blood = c(FALSE, FALSE, TRUE, FALSE, TRUE,
FALSE), X = structure(c(1L, 1L, 1L, 1L, 1L, 10L), .Label = c("",
"BREETHING HOLE HARD TO SEE", "CHECK - CENTRAL REGION OF THICKER ICE",
"DAMAGED IMAGE", "HARD TO DISTINGUISH IF SEAL AND BREATHING HOLE PRESENT",
"ICE OR SNOW - CHECK", "MOSAIC", "NO EVIDENCE OF BREATHING HOLE",
"PRESENCE OF ICE DRIFTS (SMALL)", "SEAL PRESENT ON A SMALL ISLAND OF ICE",
"SNOW DRIFTS"), class = "factor")), row.names = 99:104, class = "data.frame")
我希望在Pups列上具有显示存在和不存在的数据框,以便在一个站点有多个计数的地方,将有多个存在数据记录。
预期结果是:
Pup(present) Year Survey.Typ File.Name ice.ras.va
1 2008 Air asi-n3125-20080223.tif 200 -32.0000
1 2008 Air asi-n3125-20080223.tif 200 -32.0000
答案 0 :(得分:0)
一种解决方案是在单元格中创建一个“存在”事件列表,然后使用unnest
包中的tidyr
取消嵌套列表,即展开data.frame。
library("dplyr")
# Making up some data
df <- data.frame(pup_id = 1:10,
metadata = sample(LETTERS, 10),
count = rpois(10, 3) + 1L)
# pup_id metadata count
# 1 1 M 2
# 2 2 X 6
# 3 3 O 3
# 4 4 Z 6
# 5 5 H 7
# 6 6 U 1
# 7 7 L 2
# 8 8 P 5
# 9 9 Q 6
# 10 10 S 8
# Create a vector of presence for each pup
df_with_presence_list <-
df %>%
group_by(pup_id) %>%
mutate(presence = list(rep(1, count)))
# This has added a list-column that we can now unnest
# You can do it all in one step but it was to show you
df_with_presence <-
df_with_presence_list %>%
tidyr::unnest(presence)
# # A tibble: 46 x 4
# # Groups: pup_id [10]
# pup_id metadata count presence
# <int> <fct> <int> <dbl>
# 1 1 M 2 1
# 2 1 M 2 1
# 3 2 X 6 1
# 4 2 X 6 1
# 5 2 X 6 1
# 6 2 X 6 1
# 7 2 X 6 1
# 8 2 X 6 1
# 9 3 O 3 1
# 10 3 O 3 1
# # … with 36 more rows