我有两个数据框。一个具有不均匀间隔的每日计数(名为y
),另一个具有均匀间隔的每周数据(名为gIm
;两个变量表示日期:weekStart
和weekEnd
)。我想计算每周weekStart
和weekEnd
之间的所有日常观察结果,并将这个新的计数向量附加到我的每周数据数据框中。
y<-y[order(as.Date(y$date, format="%Y/%m/%d")),] # Sort by week
start<-unique(gIm$weekStart)
end<-unique(gIm$weekEnd)
gIm$count<-NA
for(l in 1:length(gIm[,1])){ # index by weeks in gIm--365 weeks
for(i in 1:nrow(y)){ # index by no. obs in y
gIm$count[i]<-sum(y$count[y$date >= start[l] & y$date <=end[l] ], na.rm=TRUE)
}
}
这是我不均匀的每日数据(道歉长度):
structure(list(date = structure(c(12437, 12478, 12486, 12487,
12493, 12494, 12495, 12500, 12502, 12506, 12900, 12955, 12962,
12964, 12977, 12982, 12983, 12985, 12991, 12992, 12993, 13032,
13033, 13034, 13041, 13046, 13048, 13053, 13055, 13063, 13073,
13074, 13075, 13082, 13083, 13084, 13094, 13096, 13097, 13101,
13103, 13104, 13105, 13123, 13124, 13125, 13130, 13133, 13209,
13214, 13235, 13242, 13244, 13263, 13272, 13277, 13285, 13291,
13293, 13305, 13306, 13311, 13312, 13314, 13320, 13328, 13339,
13342, 13346, 13354, 13356, 13357, 13405, 13406, 13410, 13419,
13420, 13489, 13517, 13518, 13522, 13523, 13525, 13530, 13531,
13535, 13542, 13543, 13544, 13550, 13551, 13552, 13559, 13560,
13572, 13573, 13577, 13578, 13579, 13580, 13581, 13585, 13587,
13592, 13593, 13594, 13600, 13601, 13620, 13621, 13622, 13626,
13641, 13643, 13647, 13650, 13654, 13657, 13686, 13692, 13704,
13711, 13717, 13718, 13720, 13726, 14569, 14629, 14630, 14637,
14642, 14644, 14664, 14672, 14677, 14683, 14713, 14727, 14736,
14272, 14782, 14789, 14805, 14816, 14825, 14866, 14874, 14880,
14881, 14930, 14943, 14287, 14314, 14329, 14336, 14250, 14357,
14362, 14369, 14370), class = "Date"), count = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 3L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 3L, 1L, 1L,
1L, 2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 3L, 1L,
1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 12L, 2L, 1L, 1L,
1L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 3L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 3L, 2L, 1L, 3L, 1L, 2L, 2L,
2L, 1L, 3L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 2L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L)), .Names = c("date",
"count"), row.names = c(NA, -160L), class = "data.frame")
这是我的每周间隔数据(道歉的长度):
structure(list(immigration = c(62L, 53L, 47L, 47L, 46L, 46L,
47L, 49L, 49L, 43L, 47L, 41L, 46L, 44L, 41L, 45L, 52L, 49L, 47L,
41L, 41L, 37L, 37L, 36L, 37L, 36L, 37L, 38L, 36L, 34L, 33L, 34L,
32L, 35L, 34L, 38L, 40L, 43L, 43L, 42L, 42L, 41L, 42L, 48L, 46L,
47L, 40L, 48L, 44L, 42L, 30L, 32L, 41L, 37L, 37L, 39L, 39L, 43L,
39L, 39L, 42L, 41L, 41L, 37L, 39L, 37L, 40L, 40L, 41L, 41L, 41L,
39L, 38L, 35L, 36L, 33L, 31L, 33L, 32L, 32L, 33L, 32L, 31L, 31L,
33L, 33L, 29L, 32L, 38L, 37L, 36L, 38L, 39L, 41L, 39L, 38L, 39L,
38L, 31L, 42L, 39L, 37L, 30L, 27L, 33L, 36L, 33L, 35L, 36L, 36L,
35L, 34L, 39L, 42L, 41L, 44L, 93L, 83L, 91L, 70L, 81L, 100L,
64L, 78L, 72L, 54L, 48L, 40L, 36L, 33L, 33L, 34L, 34L, 34L, 31L,
31L, 33L, 32L, 31L, 33L, 38L, 38L, 41L, 40L, 39L, 41L, 41L, 43L,
43L, 45L, 35L, 43L, 41L, 39L, 29L, 26L, 32L, 38L, 34L, 39L, 39L,
39L, 39L, 39L, 39L, 42L, 42L, 43L, 42L, 43L, 44L, 41L, 43L, 52L,
45L, 63L, 64L, 53L, 60L, 57L, 51L, 65L, 44L, 39L, 41L, 38L, 31L,
30L, 29L, 30L, 31L, 31L, 33L, 35L, 36L, 36L, 37L, 36L, 36L, 38L,
38L, 39L, 31L, 40L, 39L, 36L, 29L, 21L, 27L, 35L, 33L, 32L, 34L,
36L, 35L, 32L, 35L, 33L, 34L, 31L, 31L, 33L, 34L, 34L, 33L, 33L,
32L, 31L, 29L, 25L, 27L, 24L, 24L, 23L, 22L, 23L, 23L, 23L, 22L,
22L, 21L, 21L, 24L, 23L, 27L, 28L, 29L, 29L, 29L, 30L, 31L, 31L,
30L, 30L, 30L, 23L, 29L, 27L, 23L, 16L, 17L, 24L, 26L, 26L, 27L,
28L, 29L, 27L, 29L, 29L, 29L, 28L, 29L, 29L, 29L, 30L, 30L, 29L,
29L, 28L, 25L, 25L, 25L, 25L, 24L, 24L, 23L, 23L, 23L, 22L, 23L,
22L, 22L, 21L, 22L, 22L, 23L, 25L, 25L, 26L, 27L, 26L, 27L, 26L,
27L, 26L, 28L, 21L, 26L, 25L, 24L, 18L, 17L, 24L, 26L, 25L, 25L,
25L, 24L, 24L, 25L, 26L, 28L, 27L, 32L, 26L, 27L, 29L, 40L, 87L,
65L, 49L, 57L, 40L, 33L, 30L, 28L, 28L, 29L, 30L, 29L, 26L, 36L,
26L, 23L, 21L, 21L, 23L, 22L, 24L, 27L, 25L, 26L, 24L, 25L, 26L,
27L, 24L, 27L, 19L, 24L, 25L, 21L, 15L, 14L), weekStart = structure(c(12421,
12428, 12435, 12442, 12449, 12456, 12463, 12470, 12477, 12484,
12491, 12498, 12505, 12512, 12519, 12526, 12533, 12540, 12547,
12554, 12561, 12568, 12575, 12582, 12589, 12596, 12603, 12610,
12617, 12624, 12631, 12638, 12645, 12652, 12659, 12666, 12673,
12680, 12687, 12694, 12701, 12708, 12715, 12722, 12729, 12736,
12743, 12750, 12757, 12764, 12771, 12778, 12785, 12792, 12799,
12806, 12813, 12820, 12827, 12834, 12841, 12848, 12855, 12862,
12869, 12876, 12883, 12890, 12897, 12904, 12911, 12918, 12925,
12932, 12939, 12946, 12953, 12960, 12967, 12974, 12981, 12988,
12995, 13002, 13009, 13016, 13023, 13030, 13037, 13044, 13051,
13058, 13065, 13072, 13079, 13086, 13093, 13100, 13107, 13114,
13121, 13128, 13135, 13142, 13149, 13156, 13163, 13170, 13177,
13184, 13191, 13198, 13205, 13212, 13219, 13226, 13233, 13240,
13247, 13254, 13261, 13268, 13275, 13282, 13289, 13296, 13303,
13310, 13317, 13324, 13331, 13338, 13345, 13352, 13359, 13366,
13373, 13380, 13387, 13394, 13401, 13408, 13415, 13422, 13429,
13436, 13443, 13450, 13457, 13464, 13471, 13478, 13485, 13492,
13499, 13506, 13513, 13520, 13527, 13534, 13541, 13548, 13555,
13562, 13569, 13576, 13583, 13590, 13597, 13604, 13611, 13618,
13625, 13632, 13639, 13646, 13653, 13660, 13667, 13674, 13681,
13688, 13695, 13702, 13709, 13716, 13723, 13730, 13737, 13744,
13751, 13758, 13765, 13772, 13779, 13786, 13793, 13800, 13807,
13814, 13821, 13828, 13835, 13842, 13849, 13856, 13863, 13870,
13877, 13884, 13891, 13898, 13905, 13912, 13919, 13926, 13933,
13940, 13947, 13954, 13961, 13968, 13975, 13982, 13989, 13996,
14003, 14010, 14017, 14024, 14031, 14038, 14045, 14052, 14059,
14066, 14073, 14080, 14087, 14094, 14101, 14108, 14115, 14122,
14129, 14136, 14143, 14150, 14157, 14164, 14171, 14178, 14185,
14192, 14199, 14206, 14213, 14220, 14227, 14234, 14241, 14248,
14255, 14262, 14269, 14276, 14283, 14290, 14297, 14304, 14311,
14318, 14325, 14332, 14339, 14346, 14353, 14360, 14367, 14374,
14381, 14388, 14395, 14402, 14409, 14416, 14423, 14430, 14437,
14444, 14451, 14458, 14465, 14472, 14479, 14486, 14493, 14500,
14507, 14514, 14521, 14528, 14535, 14542, 14549, 14556, 14563,
14570, 14577, 14584, 14591, 14598, 14605, 14612, 14619, 14626,
14633, 14640, 14647, 14654, 14661, 14668, 14675, 14682, 14689,
14696, 14703, 14710, 14717, 14724, 14731, 14738, 14745, 14752,
14759, 14766, 14773, 14780, 14787, 14794, 14801, 14808, 14815,
14822, 14829, 14836, 14843, 14850, 14857, 14864, 14871, 14878,
14885, 14892, 14899, 14906, 14913, 14920, 14927, 14934, 14941,
14948, 14955, 14962, 14969), class = "Date"), weekEnd = structure(c(12427,
12434, 12441, 12448, 12455, 12462, 12469, 12476, 12483, 12490,
12497, 12504, 12511, 12518, 12525, 12532, 12539, 12546, 12553,
12560, 12567, 12574, 12581, 12588, 12595, 12602, 12609, 12616,
12623, 12630, 12637, 12644, 12651, 12658, 12665, 12672, 12679,
12686, 12693, 12700, 12707, 12714, 12721, 12728, 12735, 12742,
12749, 12756, 12763, 12770, 12777, 12784, 12791, 12798, 12805,
12812, 12819, 12826, 12833, 12840, 12847, 12854, 12861, 12868,
12875, 12882, 12889, 12896, 12903, 12910, 12917, 12924, 12931,
12938, 12945, 12952, 12959, 12966, 12973, 12980, 12987, 12994,
13001, 13008, 13015, 13022, 13029, 13036, 13043, 13050, 13057,
13064, 13071, 13078, 13085, 13092, 13099, 13106, 13113, 13120,
13127, 13134, 13141, 13148, 13155, 13162, 13169, 13176, 13183,
13190, 13197, 13204, 13211, 13218, 13225, 13232, 13239, 13246,
13253, 13260, 13267, 13274, 13281, 13288, 13295, 13302, 13309,
13316, 13323, 13330, 13337, 13344, 13351, 13358, 13365, 13372,
13379, 13386, 13393, 13400, 13407, 13414, 13421, 13428, 13435,
13442, 13449, 13456, 13463, 13470, 13477, 13484, 13491, 13498,
13505, 13512, 13519, 13526, 13533, 13540, 13547, 13554, 13561,
13568, 13575, 13582, 13589, 13596, 13603, 13610, 13617, 13624,
13631, 13638, 13645, 13652, 13659, 13666, 13673, 13680, 13687,
13694, 13701, 13708, 13715, 13722, 13729, 13736, 13743, 13750,
13757, 13764, 13771, 13778, 13785, 13792, 13799, 13806, 13813,
13820, 13827, 13834, 13841, 13848, 13855, 13862, 13869, 13876,
13883, 13890, 13897, 13904, 13911, 13918, 13925, 13932, 13939,
13946, 13953, 13960, 13967, 13974, 13981, 13988, 13995, 14002,
14009, 14016, 14023, 14030, 14037, 14044, 14051, 14058, 14065,
14072, 14079, 14086, 14093, 14100, 14107, 14114, 14121, 14128,
14135, 14142, 14149, 14156, 14163, 14170, 14177, 14184, 14191,
14198, 14205, 14212, 14219, 14226, 14233, 14240, 14247, 14254,
14261, 14268, 14275, 14282, 14289, 14296, 14303, 14310, 14317,
14324, 14331, 14338, 14345, 14352, 14359, 14366, 14373, 14380,
14387, 14394, 14401, 14408, 14415, 14422, 14429, 14436, 14443,
14450, 14457, 14464, 14471, 14478, 14485, 14492, 14499, 14506,
14513, 14520, 14527, 14534, 14541, 14548, 14555, 14562, 14569,
14576, 14583, 14590, 14597, 14604, 14611, 14618, 14625, 14632,
14639, 14646, 14653, 14660, 14667, 14674, 14681, 14688, 14695,
14702, 14709, 14716, 14723, 14730, 14737, 14744, 14751, 14758,
14765, 14772, 14779, 14786, 14793, 14800, 14807, 14814, 14821,
14828, 14835, 14842, 14849, 14856, 14863, 14870, 14877, 14884,
14891, 14898, 14905, 14912, 14919, 14926, 14933, 14940, 14947,
14954, 14961, 14968, 14975), class = "Date")), .Names = c("immigration",
"weekStart", "weekEnd"), class = "data.frame", row.names = c(NA,
-365L))
感谢您的帮助!
答案 0 :(得分:3)
y
中每个日期的开始一周:
y$weekStart <- y$date - as.POSIXlt(y$date)$wday
汇总这些以结合周数(省略结果中现在不需要的date
列):
yy <- aggregate(count ~ weekStart, data=y, FUN=sum)
最后,与gIm
合并:
m <- merge(gIm, yy, all=TRUE)
> head(m, 10)
weekStart immigration weekEnd count
1 2004-01-04 62 2004-01-10 NA
2 2004-01-11 53 2004-01-17 NA
3 2004-01-18 47 2004-01-24 1
4 2004-01-25 47 2004-01-31 NA
5 2004-02-01 46 2004-02-07 NA
6 2004-02-08 46 2004-02-14 NA
7 2004-02-15 47 2004-02-21 NA
8 2004-02-22 49 2004-02-28 NA
9 2004-02-29 49 2004-03-06 1
10 2004-03-07 43 2004-03-13 2
答案 1 :(得分:2)
这是一个使用data.table的选项
您可以在适当的日期键入数据集
然后,我们可以在运行中排除每周的日期&#34;&#34; (在j
中)并合并它们。
library(data.table)
gdt <- data.table(gIm, key="weekStart")
ydt <- data.table(y, key="date")
weeklyCounts <-
ydt[setkey(gdt[, list("date"=seq(weekStart, weekEnd, length.out=7)), by=weekStart], "date")][
, list(totalCounts = sum(count, na.rm=TRUE))
, by="weekStart"]
gdt[ setkey(weeklyCounts, weekStart), totalCounts := totalCounts]
gdt
immigration weekStart weekEnd totalCounts
1: 62 2004-01-04 2004-01-10 0
2: 53 2004-01-11 2004-01-17 0
3: 47 2004-01-18 2004-01-24 1
4: 47 2004-01-25 2004-01-31 0
5: 46 2004-02-01 2004-02-07 0
---
361: 24 2010-11-28 2010-12-04 1
362: 25 2010-12-05 2010-12-11 0
363: 21 2010-12-12 2010-12-18 0
364: 15 2010-12-19 2010-12-25 0
365: 14 2010-12-26 2011-01-01 0
答案 2 :(得分:1)
这是使用data.table
和滚动连接
weekData <- data.table(gIm, key = 'weekEnd')
dayData <- data.table(y, key = 'date')
# create a copy of the key column
weekData[, we := weekEnd]
# roll so that weekEnd can go back no more than 6 days
# this (the count column is then summed over the original weekEnd values stored in we
we <- weekData[dayData,roll= -6,nomatch = NA][, list(count = sum(count)), keyby =list(weekEnd = we)]
# join with original weekData, then set `NA` values in count to 0,
# and remove the additional `we` column
weekSum <- (we[weekData])[is.na(count), c('count','we') := list(0L,NULL)]
head(weekSum, 10)
weekEnd count immigration weekStart
1: 2004-01-10 0 62 2004-01-04
2: 2004-01-17 0 53 2004-01-11
3: 2004-01-24 1 47 2004-01-18
4: 2004-01-31 0 47 2004-01-25
5: 2004-02-07 0 46 2004-02-01
6: 2004-02-14 0 46 2004-02-08
7: 2004-02-21 0 47 2004-02-15
8: 2004-02-28 0 49 2004-02-22
9: 2004-03-06 1 49 2004-02-29
10: 2004-03-13 2 43 2004-03-07
滚动一定天数的能力是data.table 1.8.8的一个特征。来自NEWS
- 除了TRUE / FALSE之外,'roll'现在可能是正数(前滚/ LOCF)或 负数(向后滚动/ NOCB)。有限数字限制了值的距离 滚动(有限的陈旧性)。 roll = TRUE和roll = + Inf是等价的。
编辑 - 一个(可能)更直接的版本
weekData <- data.table(gIm, key = 'weekStart')
weekly <- merge(weekData,
weekData[dayData, roll= -6][,list(count = sum(count)), by = weekStart],
all.x = TRUE, by = 'weekStart')
head(weekly, n = 10)
weekStart immigration weekEnd count
1: 2004-01-04 62 2004-01-10 NA
2: 2004-01-11 53 2004-01-17 NA
3: 2004-01-18 47 2004-01-24 1
4: 2004-01-25 47 2004-01-31 NA
5: 2004-02-01 46 2004-02-07 NA
6: 2004-02-08 46 2004-02-14 NA
7: 2004-02-15 47 2004-02-21 NA
8: 2004-02-22 49 2004-02-28 NA
9: 2004-02-29 49 2004-03-06 1
10: 2004-03-07 43 2004-03-13 2