我有一个整洁的数据集my_durations
,其中数据框中的每种情况都对应于一段时间内获取的样本,如下所示:
> glimpse(my_durations)
Observations: 300
Variables: 5
$ sample_id <int> 2, 8, 25, 41, 59, 70, 98, 100, 105, 106, 108, 114, 119, 126,...
$ site_id <int> 2, 13, 12, 23, 47, 23, 66, 72, 72, 50, 50, 54, 45, 73, 48, 7...
$ start_date <dttm> 2015-04-12, 2015-06-10, 2015-07-02, 2015-07-22, 2015-07-29,...
$ end_date <dttm> 2015-05-14, 2015-06-18, 2015-07-08, 2015-07-24, 2015-07-30,...
$ duration <time> 32 days, 8 days, 6 days, 2 days, 1 days, 4 days, 12 days, 2...
sample_id
是该样本的唯一ID,site_id
只是用于跟踪样本采集位置的ID,start_date
和end_date
是样本的唯一ID。采样开始和结束,duration
只是start_date
和end_date
之间的时间差。
这是数据集的完整dput()
:
structure(list(sample_id = c(2L, 8L, 25L, 41L, 59L, 70L, 98L,
100L, 105L, 106L, 108L, 114L, 119L, 126L, 128L, 146L, 151L, 164L,
167L, 169L, 175L, 190L, 198L, 200L, 222L, 237L, 254L, 273L, 276L,
280L, 281L, 290L, 300L, 305L, 314L, 345L, 354L, 371L, 376L, 379L,
380L, 382L, 383L, 389L, 401L, 410L, 413L, 424L, 439L, 466L, 469L,
476L, 482L, 484L, 499L, 505L, 517L, 538L, 580L, 582L, 583L, 584L,
635L, 650L, 655L, 658L, 662L, 671L, 674L, 702L, 709L, 710L, 712L,
715L, 716L, 724L, 734L, 735L, 738L, 785L, 789L, 793L, 794L, 803L,
833L, 856L, 859L, 865L, 866L, 888L, 895L, 898L, 900L, 907L, 938L,
979L, 980L, 988L, 991L, 1009L, 1026L, 1031L, 1034L, 1050L, 1058L,
1061L, 1063L, 1066L, 1069L, 1077L, 1081L, 1091L, 1092L, 1099L,
1100L, 1108L, 1115L, 1119L, 1143L, 1149L, 1158L, 1180L, 1190L,
1195L, 1198L, 1207L, 1231L, 1234L, 1236L, 1242L, 1249L, 1250L,
1271L, 1288L, 1294L, 1311L, 1312L, 1313L, 1319L, 1337L, 1341L,
1345L, 1349L, 1360L, 1374L, 1379L, 1389L, 1393L, 1396L, 1401L,
1404L, 1407L, 1422L, 1434L, 1438L, 1448L, 1454L, 1463L, 1473L,
1489L, 1508L, 1514L, 1518L, 1531L, 1551L, 1564L, 1565L, 1571L,
1572L, 1597L, 1602L, 1619L, 1624L, 1629L, 1630L, 1659L, 1661L,
1666L, 1669L, 1672L, 1678L, 1690L, 1697L, 1700L, 1703L, 1707L,
1715L, 1719L, 1725L, 1732L, 1737L, 1739L, 1754L, 1771L, 1788L,
1790L, 1796L, 1799L, 1802L, 1805L, 1813L, 1814L, 1832L, 1839L,
1844L, 1848L, 1873L, 1887L, 1893L, 1900L, 1901L, 1917L, 1920L,
1939L, 1948L, 1954L, 1956L, 1968L, 1971L, 1975L, 1979L, 2008L,
2015L, 2019L, 2021L, 2028L, 2035L, 2048L, 2062L, 2071L, 2072L,
2075L, 2085L, 2090L, 2091L, 2100L, 2106L, 2115L, 2172L, 2178L,
2181L, 2221L, 2225L, 2228L, 2230L, 2231L, 2237L, 2241L, 2265L,
2266L, 2271L, 2282L, 2284L, 2311L, 2319L, 2337L, 2377L, 2405L,
2409L, 2412L, 2429L, 2434L, 2460L, 2483L, 2485L, 2488L, 2490L,
2500L, 2513L, 2520L, 2521L, 2527L, 2539L, 2555L, 2569L, 2599L,
2605L, 2610L, 2635L, 2640L, 2641L, 2656L, 2667L, 2689L, 2705L,
2720L, 2747L, 2753L, 2756L, 2761L, 2769L, 2772L, 2809L, 2816L,
2818L, 2821L, 2823L, 2828L, 2837L, 2838L), site_id = c(2L, 13L,
12L, 23L, 47L, 23L, 66L, 72L, 72L, 50L, 50L, 54L, 45L, 73L, 48L,
73L, 84L, 85L, 85L, 52L, 66L, 73L, 76L, 95L, 61L, 73L, 106L,
72L, 108L, 90L, 91L, 44L, 103L, 90L, 108L, 105L, 122L, 131L,
133L, 133L, 133L, 133L, 133L, 52L, 138L, 136L, 113L, 146L, 55L,
147L, 113L, 151L, 147L, 117L, 74L, 160L, 55L, 73L, 74L, 73L,
151L, 73L, 169L, 168L, 73L, 73L, 44L, 73L, 182L, 74L, 73L, 105L,
160L, 184L, 184L, 74L, 74L, 73L, 113L, 199L, 73L, 202L, 198L,
73L, 199L, 74L, 73L, 74L, 74L, 198L, 213L, 212L, 213L, 44L, 160L,
221L, 218L, 230L, 226L, 201L, 74L, 73L, 230L, 184L, 161L, 74L,
74L, 73L, 214L, 74L, 73L, 73L, 74L, 73L, 74L, 73L, 74L, 74L,
226L, 73L, 74L, 74L, 201L, 201L, 73L, 74L, 242L, 226L, 74L, 74L,
113L, 73L, 249L, 73L, 249L, 74L, 247L, 240L, 73L, 74L, 44L, 73L,
201L, 74L, 74L, 191L, 73L, 254L, 201L, 248L, 237L, 260L, 73L,
226L, 74L, 191L, 226L, 259L, 73L, 226L, 74L, 237L, 74L, 248L,
275L, 276L, 276L, 277L, 277L, 260L, 280L, 280L, 160L, 244L, 262L,
74L, 44L, 74L, 44L, 73L, 74L, 73L, 73L, 74L, 74L, 73L, 74L, 73L,
244L, 74L, 73L, 105L, 74L, 74L, 294L, 73L, 223L, 223L, 248L,
295L, 73L, 74L, 74L, 295L, 73L, 269L, 73L, 201L, 199L, 74L, 74L,
74L, 271L, 292L, 105L, 292L, 199L, 267L, 292L, 305L, 74L, 74L,
295L, 309L, 74L, 310L, 310L, 271L, 316L, 74L, 73L, 305L, 73L,
113L, 74L, 74L, 73L, 191L, 74L, 245L, 226L, 321L, 241L, 320L,
113L, 323L, 73L, 320L, 73L, 74L, 74L, 73L, 73L, 191L, 74L, 73L,
74L, 74L, 74L, 73L, 245L, 113L, 73L, 16L, 73L, 348L, 350L, 245L,
306L, 191L, 245L, 350L, 244L, 348L, 113L, 191L, 306L, 73L, 73L,
306L, 350L, 73L, 361L, 245L, 73L, 114L, 191L, 73L, 357L, 361L,
376L, 364L, 360L, 378L, 357L, 73L, 380L, 73L, 350L, 364L), start_date = structure(c(1428796800,
1433894400, 1435795200, 1437523200, 1438128000, 1438300800, 1437609600,
1438905600, 1438905600, 1438041600, 1438041600, 1438646400, 1438560000,
1439424000, 1438819200, 1440115200, 1439856000, 1440115200, 1440115200,
1438041600, 1440460800, 1441497600, 1440547200, 1441238400, 1438992000,
1442707200, 1443225600, 1439337600, 1440633600, 1442707200, 1442707200,
1444089600, 1442534400, 1444348800, 1443225600, 1444694400, 1445817600,
1445472000, 1446854400, 1446854400, 1446854400, 1446854400, 1446854400,
1441584000, 1447459200, 1447372800, 1444348800, 1447977600, 1448064000,
1448064000, 1446940800, 1449014400, 1448064000, 1445904000, 1449878400,
1449792000, 1449878400, 1451001600, 1452729600, 1452902400, 1452470400,
1452988800, 1453075200, 1454889600, 1455235200, 1455408000, 1454976000,
1455753600, 1453766400, 1456963200, 1457308800, 1456876800, 1456876800,
1456790400, 1457395200, 1457827200, 1458086400, 1458172800, 1455580800,
1460073600, 1460419200, 1456617600, 1460073600, 1460851200, 1460073600,
1462233600, 1462320000, 1462492800, 1462665600, 1460073600, 1462579200,
1462492800, 1462579200, 1462838400, 1463443200, 1463702400, 1463616000,
1464912000, 1464825600, 1465171200, 1466035200, 1466121600, 1464912000,
1461888000, 1464652800, 1466467200, 1466553600, 1466640000, 1462579200,
1466726400, 1466985600, 1467331200, 1467331200, 1467590400, 1467590400,
1467936000, 1468108800, 1468281600, 1468368000, 1469145600, 1469404800,
1470009600, 1470009600, 1470009600, 1470441600, 1470787200, 1471219200,
1470096000, 1470268800, 1470873600, 1467590400, 1471564800, 1471478400,
1472256000, 1471478400, 1472515200, 1471219200, 1472256000, 1472601600,
1472860800, 1472688000, 1473292800, 1472947200, 1473638400, 1474243200,
1474156800, 1475193600, 1475193600, 1474761600, 1475193600, 1471046400,
1475193600, 1476316800, 1473724800, 1476748800, 1476403200, 1476748800,
1477785600, 1478044800, 1477958400, 1479168000, 1478304000, 1479254400,
1473811200, 1477699200, 1476576000, 1476576000, 1477872000, 1478476800,
1475193600, 1477094400, 1477094400, 1479859200, 1479340800, 1475884800,
1480896000, 1480464000, 1480982400, 1480982400, 1481241600, 1481846400,
1482192000, 1482451200, 1482537600, 1482624000, 1482969600, 1483228800,
1483401600, 1481846400, 1483747200, 1483920000, 1483488000, 1484438400,
1484956800, 1485216000, 1485388800, 1473292800, 1478995200, 1485216000,
1485216000, 1485907200, 1485907200, 1486339200, 1485216000, 1486512000,
1485216000, 1487116800, 1487030400, 1485388800, 1487721600, 1487808000,
1488153600, 1487289600, 1485129600, 1488240000, 1485129600, 1485388800,
1480896000, 1485129600, 1488412800, 1489104000, 1490054400, 1485216000,
1490054400, 1490400000, 1490572800, 1490659200, 1489622400, 1489881600,
1491436800, 1491523200, 1488412800, 1491782400, 1488758400, 1491868800,
1492473600, 1492646400, 1492387200, 1494633600, 1494288000, 1494288000,
1495152000, 1494201600, 1494979200, 1491868800, 1495065600, 1496102400,
1494979200, 1497052800, 1497052800, 1497225600, 1497657600, 1497744000,
1498435200, 1499212800, 1499904000, 1501372800, 1502409600, 1502582400,
1502668800, 1502236800, 1501718400, 1504569600, 1502841600, 1505174400,
1503878400, 1503964800, 1505260800, 1503964800, 1505606400, 1505865600,
1503964800, 1504656000, 1503878400, 1505520000, 1508716800, 1503964800,
1509580800, 1510704000, 1503964800, 1503964800, 1511481600, 1508889600,
1512518400, 1513987200, 1513555200, 1514764800, 1516665600, 1515456000,
1508889600, 1517097600, 1511654400, 1510012800, 1518393600, 1515456000,
1519257600, 1518825600, 1519344000, 1503964800, 1511654400), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), end_date = structure(c(1431561600,
1434585600, 1436313600, 1437696000, 1438214400, 1438646400, 1438646400,
1439078400, 1439078400, 1438128000, 1438128000, 1439164800, 1438905600,
1439942400, 1438992000, 1440288000, 1440460800, 1440720000, 1440720000,
1438128000, 1440547200, 1441584000, 1441238400, 1441670400, 1439769600,
1443052800, 1444003200, 1439769600, 1441324800, 1444348800, 1444348800,
1444694400, 1444521600, 1444867200, 1445126400, 1445212800, 1446336000,
1445558400, 1447113600, 1447286400, 1447372800, 1447545600, 1447632000,
1442707200, 1447545600, 1448236800, 1444608000, 1448236800, 1449014400,
1449273600, 1449273600, 1449532800, 1449446400, 1445990400, 1450051200,
1450137600, 1450396800, 1451174400, 1452902400, 1452988800, 1453075200,
1453075200, 1454716800, 1455148800, 1455321600, 1455494400, 1455580800,
1455840000, 1454976000, 1457049600, 1457395200, 1457395200, 1457395200,
1457395200, 1457481600, 1457913600, 1458172800, 1458345600, 1458518400,
1460419200, 1460592000, 1457222400, 1460592000, 1460937600, 1461801600,
1462320000, 1462406400, 1462665600, 1462752000, 1462924800, 1462924800,
1463097600, 1463270400, 1463443200, 1464134400, 1464912000, 1464912000,
1465084800, 1465257600, 1465776000, 1466121600, 1466208000, 1466208000,
1463702400, 1466553600, 1466553600, 1466640000, 1466726400, 1466726400,
1466812800, 1467072000, 1467417600, 1467417600, 1467676800, 1467676800,
1468022400, 1468195200, 1468368000, 1468972800, 1469232000, 1469491200,
1470096000, 1470614400, 1470614400, 1470528000, 1470873600, 1471305600,
1471305600, 1470355200, 1471219200, 1471564800, 1471651200, 1471910400,
1472342400, 1472083200, 1472601600, 1472601600, 1472601600, 1472688000,
1473120000, 1473206400, 1473379200, 1473638400, 1473811200, 1474502400,
1474761600, 1475280000, 1475366400, 1475452800, 1475712000, 1472515200,
1475884800, 1476403200, 1475625600, 1476835200, 1477180800, 1477440000,
1477872000, 1478131200, 1478649600, 1479254400, 1479513600, 1479600000,
1476316800, 1478476800, 1477526400, 1477699200, 1478304000, 1478649600,
1476576000, 1477699200, 1480204800, 1480464000, 1480636800, 1476403200,
1480982400, 1480982400, 1481241600, 1481587200, 1481673600, 1481932800,
1482278400, 1482537600, 1482624000, 1482796800, 1483056000, 1483315200,
1483488000, 1483660800, 1483833600, 1484006400, 1484006400, 1484524800,
1485043200, 1485388800, 1485475200, 1474761600, 1480809600, 1485734400,
1485734400, 1485993600, 1485993600, 1486425600, 1486425600, 1486598400,
1486684800, 1487203200, 1487462400, 1487635200, 1487808000, 1487894400,
1488240000, 1488240000, 1488672000, 1488844800, 1488931200, 1488931200,
1485648000, 1489449600, 1489449600, 1489536000, 1490140800, 1490313600,
1490400000, 1490572800, 1490659200, 1490745600, 1491004800, 1489968000,
1491523200, 1491609600, 1491609600, 1491868800, 1491868800, 1492128000,
1492560000, 1492732800, 1492992000, 1494720000, 1494892800, 1494979200,
1495670400, 1495843200, 1495670400, 1495929600, 1495324800, 1496188800,
1496188800, 1497139200, 1497139200, 1497312000, 1497744000, 1497830400,
1498953600, 1499299200, 1499990400, 1501459200, 1502496000, 1502668800,
1502755200, 1503446400, 1503446400, 1504656000, 1503100800, 1505260800,
1505260800, 1505260800, 1505865600, 1506211200, 1506297600, 1506470400,
1506470400, 1507075200, 1507248000, 1508025600, 1509235200, 1509321600,
1509753600, 1510790400, 1510790400, 1510790400, 1511568000, 1511913600,
1513123200, 1514160000, 1514764800, 1516320000, 1516752000, 1516838400,
1516838400, 1517184000, 1517270400, 1518480000, 1518998400, 1519171200,
1519344000, 1519344000, 1519430400, 1519689600, 1519689600), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), duration = structure(c(32, 8, 6, 2,
1, 4, 12, 2, 2, 1, 1, 6, 4, 6, 2, 2, 7, 7, 7, 1, 1, 1, 8, 5,
9, 4, 9, 5, 8, 19, 19, 7, 23, 6, 22, 6, 6, 1, 3, 5, 6, 8, 9,
13, 1, 10, 3, 3, 11, 14, 27, 6, 16, 1, 2, 4, 6, 2, 2, 1, 7, 1,
19, 3, 1, 1, 7, 1, 14, 1, 1, 6, 6, 7, 1, 1, 1, 2, 34, 4, 2, 7,
6, 1, 20, 1, 1, 2, 1, 33, 4, 7, 8, 7, 8, 14, 15, 2, 5, 7, 1,
1, 15, 21, 22, 1, 1, 1, 48, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1,
1, 1, 7, 7, 1, 1, 1, 14, 1, 4, 46, 1, 5, 1, 7, 1, 16, 4, 1, 3,
6, 1, 8, 2, 3, 7, 1, 2, 8, 6, 17, 8, 1, 22, 1, 9, 8, 1, 1, 8,
1, 14, 4, 29, 9, 11, 13, 5, 2, 16, 7, 36, 7, 15, 6, 1, 6, 3,
7, 5, 1, 1, 1, 1, 2, 1, 1, 1, 21, 1, 1, 6, 1, 1, 2, 1, 17, 21,
6, 6, 1, 1, 1, 14, 1, 17, 1, 5, 26, 1, 1, 1, 11, 41, 7, 44, 41,
55, 50, 12, 5, 1, 59, 4, 2, 1, 1, 16, 1, 1, 1, 37, 1, 36, 3,
1, 1, 7, 1, 7, 8, 6, 19, 8, 47, 3, 1, 14, 1, 1, 1, 1, 1, 6, 1,
1, 1, 1, 1, 1, 14, 20, 1, 3, 1, 16, 15, 7, 26, 8, 7, 29, 28,
39, 29, 6, 62, 2, 1, 79, 79, 1, 35, 7, 2, 14, 18, 1, 16, 92,
1, 65, 98, 7, 43, 1, 6, 1, 182, 93), class = "difftime", units = "days")), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -300L))
我现在面临的挑战是,对于duration
天以上的任何n
(现在让我们n == 7
开始),我想将持续时间“拆分”为{ {1}}天的长段。例如,对于n
,sample_id == 2
是32天,因此我想将其分为四个7天细分,再加上一个4天细分。最后,duration
的原始行将变成 5 行,每行分别包含sample_id == 2
,start_date
和end_date
五个部分。我想要一个名为duration
的新列,以标识每个新创建的段,同时保留所有原始列。顺便说一句,对于原始segment_id
比sample_id
短的duration
,我想保持原样,而他们得到n
。
我很困惑。是否有一种相当“整洁”的方式来实现这一目标?预先非常感谢。
答案 0 :(得分:1)
我很抱歉没有使用dplyr
和tidyr
,但是我对data.table
的语法更加流利:
n <- 7L
library(data.table)
setDT(my_durations)[, {
tmp <- unique(c(seq(as.Date(start_date), as.Date(end_date), by = paste(n, "days")),
as.Date(end_date)))
.(segment_id = head(seq_along(tmp), -1L),
segment_start = head(tmp, -1L),
segment_end = tail(tmp, -1L),
segment_duration = diff(tmp))
}, by = .(sample_id, site_id, start_date, end_date, duration)]
sample_id site_id start_date end_date duration segment_id segment_start segment_end segment_duration 1: 2 2 2015-04-12 2015-05-14 32 days 1 2015-04-12 2015-04-19 7 days 2: 2 2 2015-04-12 2015-05-14 32 days 2 2015-04-19 2015-04-26 7 days 3: 2 2 2015-04-12 2015-05-14 32 days 3 2015-04-26 2015-05-03 7 days 4: 2 2 2015-04-12 2015-05-14 32 days 4 2015-05-03 2015-05-10 7 days 5: 2 2 2015-04-12 2015-05-14 32 days 5 2015-05-10 2015-05-14 4 days --- 615: 2838 364 2017-11-26 2018-02-27 93 days 10 2018-01-28 2018-02-04 7 days 616: 2838 364 2017-11-26 2018-02-27 93 days 11 2018-02-04 2018-02-11 7 days 617: 2838 364 2017-11-26 2018-02-27 93 days 12 2018-02-11 2018-02-18 7 days 618: 2838 364 2017-11-26 2018-02-27 93 days 13 2018-02-18 2018-02-25 7 days 619: 2838 364 2017-11-26 2018-02-27 93 days 14 2018-02-25 2018-02-27 2 days
OP要求保留其持续时间短于n
的段,与segment_id == 1
一样。可以使用n <- 50L
验证给定数据集,其中上面的代码将返回
sample_id site_id start_date end_date duration segment_id segment_start segment_end segment_duration 1: 2 2 2015-04-12 2015-05-14 32 days 1 2015-04-12 2015-05-14 32 days 2: 8 13 2015-06-10 2015-06-18 8 days 1 2015-06-10 2015-06-18 8 days 3: 25 12 2015-07-02 2015-07-08 6 days 1 2015-07-02 2015-07-08 6 days 4: 41 23 2015-07-22 2015-07-24 2 days 1 2015-07-22 2015-07-24 2 days 5: 59 47 2015-07-29 2015-07-30 1 days 1 2015-07-29 2015-07-30 1 days --- 308: 2837 350 2017-08-29 2018-02-27 182 days 2 2017-10-18 2017-12-07 50 days 309: 2837 350 2017-08-29 2018-02-27 182 days 3 2017-12-07 2018-01-26 50 days 310: 2837 350 2017-08-29 2018-02-27 182 days 4 2018-01-26 2018-02-27 32 days 311: 2838 364 2017-11-26 2018-02-27 93 days 1 2017-11-26 2018-01-15 50 days 312: 2838 364 2017-11-26 2018-02-27 93 days 2 2018-01-15 2018-02-27 43 days
答案 1 :(得分:1)
潜在的解决方案。请注意,我将duration
列转换为该操作的整数。您可以根据需要将其转换回去。
library(tidyverse)
library(lubridate)
# Define the number of days for one duration
num <- 7L
dat2 <- dat %>%
mutate(duration = as.integer(duration)) %>%
# Count number of duration and how many days left
mutate(times = duration %/% num, left = duration %% num) %>%
# Repeat each row based on number of duration
slice(rep(row_number(), times = times + 1)) %>%
group_by(sample_id, site_id) %>%
# Process the start and end date
mutate(seg_start_date = if_else(row_number() < n(),
start_date + days(num * (n() - 1)),
end_date - days(left)),
seg_end_date = if_else(row_number() < n(),
seg_start_date + days(num - 1),
end_date)) %>%
# Create segment id
mutate(segment_id = 1:n()) %>%
# Create segment duration
mutate(seg_duration = ifelse(row_number() == n(), left, num)) %>%
ungroup() %>%
select(-times, -left)
head(dat2) %>% as.data.frame()
# sample_id site_id start_date end_date duration seg_start_date seg_end_date segment_id seg_duration
# 1 2 2 2015-04-12 2015-05-14 32 2015-04-12 2015-04-18 1 7
# 2 2 2 2015-04-12 2015-05-14 32 2015-04-19 2015-04-25 2 7
# 3 2 2 2015-04-12 2015-05-14 32 2015-04-26 2015-05-02 3 7
# 4 2 2 2015-04-12 2015-05-14 32 2015-05-03 2015-05-09 4 7
# 5 2 2 2015-04-12 2015-05-14 32 2015-05-10 2015-05-14 5 4
# 6 8 13 2015-06-10 2015-06-18 8 2015-06-10 2015-06-16 1 7
答案 2 :(得分:1)
我们可以创建列表的列,并unnest
:
library(tidyverse)
library(lubridate)
df %>% group_by(sample_id,site_id) %>%
mutate(duration_new = (as.numeric(duration)-1) %>% seq(0,.,by=7) %>% c(duration) %>% diff %>% list,
start_date_new = list(start_date + days(c(0,cumsum(head(duration_new[[1]],-1))))),
end_date_new = list(start_date + days(cumsum(duration_new[[1]]))),
segment_id = list(seq_along(duration_new[[1]]))) %>%
unnest %>%
ungroup
# # A tibble: 619 x 9
# sample_id site_id start_date end_date duration duration_new start_date_new end_date_new segment_id
# <int> <int> <dttm> <dttm> <time> <dbl> <dttm> <dttm> <int>
# 1 2 2 2015-04-12 00:00:00 2015-05-14 00:00:00 32 7 2015-04-12 00:00:00 2015-04-19 00:00:00 1
# 2 2 2 2015-04-12 00:00:00 2015-05-14 00:00:00 32 7 2015-04-19 00:00:00 2015-04-26 00:00:00 2
# 3 2 2 2015-04-12 00:00:00 2015-05-14 00:00:00 32 7 2015-04-26 00:00:00 2015-05-03 00:00:00 3
# 4 2 2 2015-04-12 00:00:00 2015-05-14 00:00:00 32 7 2015-05-03 00:00:00 2015-05-10 00:00:00 4
# 5 2 2 2015-04-12 00:00:00 2015-05-14 00:00:00 32 4 2015-05-10 00:00:00 2015-05-14 00:00:00 5
# 6 8 13 2015-06-10 00:00:00 2015-06-18 00:00:00 8 7 2015-06-10 00:00:00 2015-06-17 00:00:00 1
# 7 8 13 2015-06-10 00:00:00 2015-06-18 00:00:00 8 1 2015-06-17 00:00:00 2015-06-18 00:00:00 2
# 8 25 12 2015-07-02 00:00:00 2015-07-08 00:00:00 6 6 2015-07-02 00:00:00 2015-07-08 00:00:00 1
# 9 41 23 2015-07-22 00:00:00 2015-07-24 00:00:00 2 2 2015-07-22 00:00:00 2015-07-24 00:00:00 1
# 10 59 47 2015-07-29 00:00:00 2015-07-30 00:00:00 1 1 2015-07-29 00:00:00 2015-07-30 00:00:00 1
# # ... with 609 more rows
嵌套的工作方式
在mutate
调用之后,data.frame
的行数与以前一样,但是有新的列。这些新列都包含列表元素(即列表元素),我们称它们为列表列。
然后unnest
垂直开发这些列表,我们得到了更多的行,只是重复了不是列表列的列中的元素。
from ?unnest
:如果您有一个列表列,这会使列表的每个元素成为自己的行。
duration_new
的构建方式
我们使用seq
(请参阅?seq
)进行一个简单的中断序列,但是seq没有给出最后一个元素(这是完整的持续时间),因此我们将其与{{1}相加}。中断的c
给出了各个持续时间。
您可以逐步执行此操作(选择直到在管道之前输入ctrl + enter):
diff
duration <- 32
(as.numeric(duration)-1) %>% seq(0,.,by=7) %>% c(duration) %>% diff %>% list
和start_date_new
的构建方式
我们从start_date开始并添加累积的持续时间,对于end_date_new
,我们将start_date_new
添加到第一个元素,因此我们将它们保持偏移。