创建双向表以在R中运行t检验

时间:2019-03-28 15:52:57

标签: r

我正在学习R,我需要帮助才能计算出相对于我的知识水平而言相当困难的分析,因此我正在寻求帮助。我创建了下面的表格,其中显示了一天中每24小时不同天的平均小时活动:

> dput(act.byHour)
structure(list(hour = 0:23, `Activity on 6/20/2018` = c(59L, 
74L, 2683L, 4341L, 3676L, 2143L, 3890L, 3887L, 1299L, 1492L, 
3449L, 2200L, 1563L, 4346L, 5329L, 3037L, 1462L, 668L, 383L, 
483L, 288L, 2765L, 3354L, 1783L), `Activity on 6/21/2018` = c(241L, 
301L, 261L, 3683L, 4356L, 3736L, 2810L, 1841L, 3146L, 609L, 2998L, 
4059L, 3690L, 3735L, 1343L, 2087L, 894L, 341L, 240L, 2113L, 1684L, 
3115L, 2890L, 138L), `Activity on 6/22/2018` = c(21L, 451L, 96L, 
2918L, 2279L, 2282L, 4992L, 698L, 427L, 581L, 1248L, 2184L, 1980L, 
2364L, 568L, 2477L, 525L, 433L, 974L, 501L, 760L, 67L, 297L, 
1198L), `Activity on 6/23/2018` = c(2L, 39L, 42L, 1182L, 1749L, 
2144L, 3123L, 1170L, 1641L, 1112L, 1526L, 1199L, 534L, 1481L, 
2388L, 2756L, 392L, 112L, 390L, 107L, 709L, 1122L, 1562L, 451L
), `Activity on 6/24/2018` = c(8L, 74L, 0L, 158L, 780L, 3118L, 
3292L, 2759L, 3121L, 2051L, 2387L, 900L, 627L, 904L, 4283L, 3726L, 
1273L, 977L, 326L, 163L, 1915L, 1073L, 1021L, 545L), `Activity on 6/25/2018` = c(36L, 
22L, 3L, 55L, 124L, 22L, 4093L, 2867L, 3649L, 2550L, 1590L, 636L, 
2571L, 998L, 1066L, 2967L, 1211L, 51L, 1188L, 1413L, 714L, 177L, 
132L, 29L), `Activity on 6/26/2018` = c(22L, 43L, 0L, 90L, 1094L, 
1655L, 2643L, 2108L, 2249L, 2453L, 2857L, 915L, 437L, 1142L, 
2193L, 2993L, 1139L, 1549L, 652L, 580L, 970L, 674L, 211L, 206L
), `Activity on 6/27/2018` = c(167L, 63L, 1L, 786L, 617L, 1575L, 
2237L, 1302L, 1149L, 2009L, 2234L, 1263L, 1259L, 2017L, 1641L, 
2683L, 1184L, 449L, 65L, 956L, 1538L, 1287L, 593L, 362L), `Activity on 6/28/2018` = c(594L, 
1172L, 25L, 445L, 921L, 1812L, 2235L, 1153L, 422L, 1084L, 2158L, 
1610L, 845L, 1187L, 2528L, 2161L, 976L, 19L, 747L, 570L, 576L, 
19L, 304L, 2L), `Activity on 6/29/2018` = c(301L, 7L, 399L, 494L, 
723L, 1088L, 771L, 85L, 1338L, 866L, 384L, 1356L, 2862L, 3805L, 
2142L, 1655L, 249L, 235L, 3L, 0L, 283L, 981L, 634L, 1370L), `Activity on 6/30/2018` = c(9L, 
137L, 33L, 975L, 1690L, 1639L, 985L, 210L, 1266L, 2135L, 2080L, 
1704L, 2449L, 3133L, 1055L, 3222L, 1152L, 173L, 858L, 188L, 700L, 
330L, 905L, 1232L), `Activity on 7/1/2018` = c(1006L, 5L, 21L, 
520L, 1162L, 1771L, 2463L, 1403L, 1353L, 1938L, 2388L, 4133L, 
900L, 2660L, 3504L, 3946L, 1956L, 818L, 604L, 937L, 373L, 48L, 
400L, 201L), `Activity on 7/10/2018` = c(705L, 47L, 605L, 257L, 
1359L, 41L, 1019L, 1426L, 2219L, 1179L, 1624L, 537L, 421L, 1747L, 
2941L, 2921L, 1046L, 283L, 476L, 218L, 59L, 389L, 657L, 1293L
), `Activity on 7/11/2018` = c(24L, 455L, 6L, 1232L, 2264L, 1152L, 
600L, 11L, 980L, 1519L, 2004L, 1933L, 2161L, 1386L, 1883L, 2978L, 
1385L, 104L, 1309L, 2L, 364L, 550L, 0L, 1433L), `Activity on 7/12/2018` = c(1634L, 
27L, 860L, 1095L, 1102L, 132L, 582L, 710L, 1368L, 2470L, 2944L, 
1030L, 1286L, 387L, 2590L, 2449L, 743L, 134L, 274L, 205L, 360L, 
627L, 1357L, 591L), `Activity on 7/13/2018` = c(216L, 143L, 70L, 
2L, 477L, 42L, 81L, 304L, 2827L, 2437L, 2002L, 688L, 935L, 812L, 
404L, 1098L, 1157L, 857L, 466L, 215L, 714L, 269L, 1223L, 8L), 
    `Activity on 7/14/2018` = c(1L, 635L, 6L, 1797L, 1363L, 246L, 
    704L, 1089L, 943L, 2251L, 813L, 2643L, 1657L, 18L, 1132L, 
    2884L, 1044L, 149L, 1146L, 68L, 1227L, 1189L, 129L, 1291L
    ), `Activity on 7/15/2018` = c(7L, 9L, 1299L, 389L, 288L, 
    157L, 0L, 324L, 248L, 915L, 795L, 598L, 733L, 308L, 2760L, 
    2874L, 1903L, 499L, 73L, 31L, 1146L, 920L, 852L, 2L), `Activity on 7/16/2018` = c(104L, 
    564L, 16L, 1903L, 675L, 1859L, 720L, 1017L, 4L, 2114L, 2264L, 
    1152L, 935L, 1691L, 1031L, 2568L, 2035L, 226L, 18L, 1716L, 
    249L, 717L, 635L, 919L), `Activity on 7/17/2018` = c(1436L, 
    16L, 17L, 1891L, 1175L, 74L, 435L, 377L, 718L, 619L, 439L, 
    1373L, 2154L, 2481L, 763L, 2084L, 910L, 641L, 669L, 737L, 
    793L, 1471L, 12L, 96L), `Activity on 7/18/2018` = c(6L, 13L, 
    81L, 1227L, 1685L, 260L, 238L, 575L, 930L, 330L, 1139L, 785L, 
    1110L, 1007L, 1770L, 2824L, 729L, 776L, 602L, 550L, 1432L, 
    567L, 197L, 107L), `Activity on 7/19/2018` = c(38L, 648L, 
    264L, 911L, 2239L, 1063L, 9L, 1336L, 1235L, 628L, 1722L, 
    1028L, 1393L, 44L, 2110L, 1719L, 666L, 127L, 885L, 788L, 
    1274L, 765L, 1094L, 38L), `Activity on 7/2/2018` = c(876L, 
    505L, 162L, 775L, 1567L, 896L, 1648L, 995L, 2574L, 1080L, 
    997L, 1881L, 1375L, 1283L, 2156L, 2384L, 982L, 33L, 20L, 
    761L, 241L, 696L, 133L, 915L), `Activity on 7/20/2018` = c(514L, 
    14L, 59L, 1081L, 1266L, 359L, 1055L, 280L, 123L, 2251L, 2302L, 
    1116L, 2750L, 764L, 1377L, 2776L, 970L, 814L, 10L, 1364L, 
    1137L, 279L, 10L, 605L), `Activity on 7/21/2018` = c(279L, 
    596L, 12L, 1443L, 1463L, 1426L, 132L, 924L, 379L, 693L, 137L, 
    219L, 884L, 194L, 450L, 1204L, 487L, 578L, 445L, 9L, 823L, 
    2L, 1212L, 12L), `Activity on 7/22/2018` = c(200L, 9L, 152L, 
    1062L, 1926L, 1156L, 1951L, 1735L, 753L, 570L, 362L, 813L, 
    756L, 1403L, 308L, 1895L, 325L, 768L, 666L, 33L, 634L, 1294L, 
    819L, 39L), `Activity on 7/23/2018` = c(579L, 8L, 657L, 438L, 
    521L, 896L, 2560L, 1383L, 819L, 1293L, 2257L, 476L, 1850L, 
    759L, 2482L, 1513L, 789L, 78L, 329L, 43L, 50L, 1583L, 342L, 
    0L), `Activity on 7/24/2018` = c(0L, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), `Activity on 7/3/2018` = c(623L, 918L, 48L, 
    523L, 721L, 1624L, 1047L, 1783L, 313L, 1042L, 2211L, 2430L, 
    1770L, 1610L, 2814L, 2460L, 1770L, 25L, 709L, 416L, 709L, 
    998L, 921L, 89L), `Activity on 7/4/2018` = c(258L, 1242L, 
    75L, 1131L, 893L, 402L, 381L, 51L, 15L, 47L, 762L, 777L, 
    479L, 2416L, 3639L, 1991L, 202L, 1054L, 917L, 1565L, 503L, 
    61L, 44L, 2103L), `Activity on 7/5/2018` = c(2212L, 352L, 
    1L, 666L, 351L, 1321L, 7L, 1010L, 1222L, 1080L, 1643L, 1101L, 
    188L, 2793L, 1548L, 1811L, 1807L, 51L, 788L, 1108L, 1157L, 
    1038L, 225L, 454L), `Activity on 7/6/2018` = c(441L, 376L, 
    444L, 5L, 501L, 579L, 1253L, 1600L, 1051L, 498L, 2217L, 2362L, 
    2425L, 1220L, 2037L, 2684L, 799L, 471L, 139L, 545L, 1117L, 
    177L, 487L, 1420L), `Activity on 7/7/2018` = c(692L, 303L, 
    736L, 750L, 1386L, 926L, 30L, 862L, 1912L, 2731L, 1123L, 
    1160L, 2892L, 1634L, 585L, 3473L, 2243L, 441L, 399L, 1482L, 
    111L, 455L, 1315L, 691L), `Activity on 7/8/2018` = c(1428L, 
    96L, 52L, 258L, 1135L, 1727L, 448L, 2148L, 358L, 2180L, 1519L, 
    2634L, 828L, 1212L, 1052L, 2851L, 902L, 171L, 236L, 3L, 727L, 
    1366L, 637L, 43L), `Activity on 7/9/2018` = c(0L, 1320L, 
    146L, 664L, 862L, 663L, 227L, 227L, 995L, 743L, 1793L, 2421L, 
    1346L, 1874L, 2182L, 1333L, 1967L, 1023L, 297L, 340L, 1469L, 
    10L, 213L, 805L)), row.names = c(NA, -24L), class = "data.frame")

我还有另一个看起来像这样的数据集:

> head(datanet)
   Date & Time [Local]    meters
1:    18/06/2018 03:08 15.959366
2:    18/06/2018 03:12 22.535566
3:    18/06/2018 03:16 12.036834
4:    18/06/2018 03:20 18.738134
5:    18/06/2018 03:24 26.781879
6:    18/06/2018 03:28  8.341659

meters列中标记了行进距离(以米为单位)。

我想创建一个表dist.byHour,其结构和格式与act.byHour完全相同,但是每个数据集在不同日期的平均小时行驶距离(而非活动时间)一天24小时。

我可以帮忙创建双向表吗?

PS:数据的这种转换需要完成,以便计算速度(列meters)与活动之间的相关性分析。

用于执行此类分析的值将从表dist.byHouract.byHour中获取。我将使用t检验,所以我希望两个表都具有易于在R中运行测试的格式。

感谢您的帮助!

1 个答案:

答案 0 :(得分:1)

按日期和时间聚合数据的关键是方法cut.POSIXt。它会创建一个额外的日期/小时列,然后将其用作汇总因子。

library(tidyverse)

datanet$datehour <- cut(datanet[[1]], breaks = "hours")

dist.byHour <- aggregate(meters ~ datehour, datanet, mean, na.rm = TRUE)
dist.byHour$datehour <- as.POSIXct(dist.byHour$datehour)
dist.byHour$hour <- format(dist.byHour$datehour, "%H")
dist.byHour$datehour <- as.Date(dist.byHour$datehour)
dist.byHour <- dist.byHour[c(3, 1, 2)]

dist.byHour <- dist.byHour %>%
  spread(datehour, -hour)

names(dist.byHour)[-1] <- paste("Activity on", names(dist.byHour)[-1])

head(dist.byHour[1:3])
#  hour Activity on 2018-06-18 Activity on 2018-06-19
#1   00              25.834355              29.388140
#2   01                     NA               8.329956
#3   02                     NA              31.506390
#4   03              33.464954              20.995957
#5   04               6.406513              17.035749
#6   05              28.254438              38.803171

数据创建代码。

set.seed(1234)    # Make the results reproducible

datetime <- as.POSIXct("2018-06-18 03:08")
datetime <- datetime + lubridate::minutes(cumsum(sample(0:59, 1e3, TRUE)))
datetime <- sort(datetime)
datanet <- data.frame(datetime, meters = runif(1e3, 1, 50))
names(datanet)[1] <- 'Date & Time [Local]'