我有这种数据:
library(dplyr)
glimpse(samp)
Observations: 218
Variables: 10
$ date_block_num <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ shop_id <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ item_id <int> 33, 482, 491, 839, 1007, 1010, 1023, 1204, 1224, 1247, 1409, ...
$ item_category_id <int> 37, 73, 73, 73, 67, 67, 67, 55, 55, 55, 19, 19, 21, 28, 30, 3...
$ item_cnt_month <dbl> 1, 1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 6, 4, 3, 3, 1, 3, 1, 1, 1...
$ item_price <dbl> 499.0000, 3300.0000, 600.0000, 3300.0000, 449.0000, 449.0000,...
$ item_cnt_month1 <dbl> 52, 7, 7, 7, 28, 28, 28, 41, 41, 41, 114, 114, 50, 37, 223, 2...
$ item_price1 <dbl> 532.0602, 1285.7143, 1285.7143, 1285.7143, 363.5926, 363.5926...
$ item_cnt_month2 <dbl> 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1...
$ item_price2 <dbl> 960.655, 960.655, 960.655, 960.655, 960.655, 960.655, 960.655...
date_block_num
是一年的一个月。 shop_id
,item_id
和item_category_id
是标识符。 item_cnt_month
和item_price
是每shop_id
和item_id
的总销售额和价格。 item_cnt_month1
和item_price1
是每shop_id
和item_category_id
的总销售额和价格。 item_cnt_month2
和item_price2
是shop_id
的总销售额和价格。每月(即每个date_num_block
)进行汇总。
我的问题是如何将数据(价格和销售汇总)滞后一个月?
对于item_cnt_month
和item_price
我做到了:
samp %>%
group_by(shop_id, item_id) %>%
mutate(lag_price = lag(item_price, 1),
lag_item = lag(item_cnt_month, 1))
结果,item_price
和item_cnt_month
在一个月内“下滑”。这是因为每个shop_id
和item_id
组合每月仅出现一次(date_block_num
)。
但是,对于item_cnt_month2/3
和item_price2/3
,它不起作用,因为这些变量是不同的。他们每个月可能出现多次。任何想法如何做到这一点?
dput(samp)
structure(list(date_block_num = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
shop_id = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), item_id = c(33L,
482L, 491L, 839L, 1007L, 1010L, 1023L, 1204L, 1224L, 1247L,
1409L, 1467L, 1523L, 1829L, 1905L, 1916L, 2252L, 2308L, 2416L,
2678L, 2808L, 2939L, 2946L, 3007L, 3076L, 3077L, 3141L, 3146L,
3158L, 3159L, 3160L, 3316L, 3328L, 3554L, 3556L, 3656L, 3686L,
3719L, 3851L, 4240L, 4241L, 4244L, 4248L, 4249L, 4531L, 4901L,
4906L, 5272L, 5572L, 5583L, 5643L, 5811L, 5822L, 5945L, 6007L,
6185L, 6238L, 6432L, 6488L, 6727L, 6953L, 7220L, 7856L, 7893L,
7894L, 7895L, 7956L, 8093L, 8094L, 8095L, 8528L, 8571L, 8743L,
8958L, 9507L, 10039L, 10391L, 10423L, 10669L, 10904L, 10930L,
11304L, 11391L, 12134L, 12135L, 12286L, 12361L, 12828L, 12830L,
12964L, 13040L, 13511L, 13923L, 14140L, 14215L, 14223L, 14252L,
14386L, 14447L, 15104L, 15118L, 15134L, 15456L, 15458L, 15702L,
15833L, 15836L, 16147L, 16154L, 16159L, 16167L, 16205L, 16206L,
16219L, 16227L, 16450L, 16451L, 16459L, 16786L, 16987L, 17481L,
17625L, 17717L, 17756L, 18059L, 18255L, 18378L, 18669L, 19046L,
19597L, 19674L, 19778L, 20212L, 20377L, 20518L, 20881L, 22087L,
22088L, 22091L, 22092L, 31L, 482L, 496L, 835L, 839L, 1007L,
1829L, 1916L, 2252L, 2308L, 2416L, 2445L, 2678L, 2753L, 2808L,
2956L, 2972L, 3007L, 3076L, 3077L, 3141L, 3148L, 3158L, 3159L,
3328L, 3394L, 3554L, 3556L, 3676L, 3851L, 4249L, 4790L, 4901L,
4906L, 4907L, 5272L, 5380L, 5572L, 5583L, 5811L, 5821L, 6156L,
6488L, 6740L, 7882L, 7893L, 7894L, 7895L, 8093L, 8528L, 8958L,
9001L, 9465L, 11391L, 11607L, 11655L, 12134L, 12135L, 12208L,
12286L, 12360L, 12839L, 13460L, 13511L, 14447L, 14931L, 16167L,
16184L, 16205L, 16210L, 16219L, 16315L, 17481L, 17717L, 19344L,
19674L, 19813L, 22088L), item_category_id = c(37L, 73L, 73L,
73L, 67L, 67L, 67L, 55L, 55L, 55L, 19L, 19L, 21L, 28L, 30L,
30L, 30L, 30L, 30L, 23L, 30L, 21L, 21L, 75L, 19L, 23L, 75L,
75L, 75L, 75L, 75L, 19L, 23L, 23L, 23L, 55L, 19L, 30L, 19L,
23L, 23L, 23L, 23L, 23L, 55L, 22L, 19L, 30L, 2L, 5L, 2L,
30L, 35L, 55L, 55L, 30L, 55L, 28L, 28L, 19L, 28L, 22L, 28L,
6L, 6L, 6L, 6L, 38L, 37L, 37L, 75L, 55L, 40L, 37L, 40L, 37L,
55L, 38L, 67L, 55L, 55L, 30L, 19L, 30L, 30L, 62L, 40L, 40L,
38L, 55L, 38L, 19L, 40L, 37L, 57L, 57L, 55L, 40L, 40L, 62L,
67L, 67L, 63L, 63L, 37L, 40L, 40L, 64L, 64L, 64L, 64L, 65L,
64L, 64L, 65L, 40L, 37L, 55L, 43L, 37L, 38L, 43L, 79L, 43L,
49L, 55L, 55L, 55L, 55L, 40L, 38L, 38L, 37L, 72L, 72L, 72L,
83L, 83L, 83L, 83L, 37L, 73L, 73L, 73L, 73L, 67L, 28L, 30L,
30L, 30L, 30L, 30L, 23L, 30L, 30L, 21L, 30L, 75L, 19L, 23L,
75L, 75L, 75L, 75L, 23L, 23L, 23L, 23L, 23L, 19L, 23L, 30L,
22L, 19L, 23L, 30L, 75L, 2L, 5L, 30L, 35L, 30L, 28L, 30L,
6L, 6L, 6L, 6L, 38L, 75L, 37L, 41L, 49L, 19L, 37L, 41L, 30L,
30L, 62L, 62L, 38L, 40L, 21L, 19L, 40L, 69L, 64L, 65L, 65L,
65L, 64L, 67L, 38L, 79L, 49L, 38L, 37L, 83L), item_cnt_month = c(1,
1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 6, 4, 3, 3, 1, 3, 1, 1,
17, 2, 2, 6, 1, 1, 1, 2, 5, 2, 1, 2, 1, 1, 1, 1, 4, 1, 3,
1, 1, 1, 1, 4, 1, 2, 3, 3, 9, 2, 0, 7, 1, 1, 1, 1, 1, 1,
1, 1, 4, 4, 2, 3, 1, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
1, 1, 3, 2, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 2, 3,
3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 1, 1, 4, 4, 1, 1, 2, 1, 1, 2, 2, 1, 4, 1, 8, 2,
7, 4, 1, 2, 2, 3, 4, 1, 1, -1, 1, 4, 1, 2, 1, 1, 2, 1, 1,
3, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 2, 1, 1, 2, 1,
1, 1, 2, 1, 1, 2, 1, 1, 3, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1,
1, 2, 2, 1, 1, 2, 3, 1), item_price = c(499, 3300, 600, 3300,
449, 449, 299, 299, 399, 299, 1398.5, 899, 799, 1499, 249,
249, 599, 799, 449, 999, 998.981481481444, 599, 599, 1790,
1399, 1199, 1130, 2490, 1290, 790, 1590, 1199, 1999, 2198.5,
2199, 299, 898.875, 599, 899, 1999, 1399, 1999, 1999, 1899,
279, 1499, 1698.83333333333, 839, 1532.85714285714, 594.15,
2387.605, 199, 999, 279, 299, 499, 299, 249, 699, 999, 399,
1998.85, 799, 1990, 1490, 899, 6890, 1399, 1068.54, 420.39,
632, 299, 149, 249.5, 149, 299, 199, 1199, 750, 149, 199,
349, 899, 299, 299, 100, 149, 149, 749, 199, 549, 1599, 149,
299, 299, 299, 199, 149, 499, 1289, 170, 170, 549, 449, 549,
149, 149, 549, 639, 629, 659, 299, 599, 399, 379, 399, 499,
299, 249, 299, 1178.61, 299, 99, 349, 249, 199, 199, 199,
299, 149, 1399, 549, 68.75, 1199, 1229, 649, 79, 79, 109,
109, 699, 3300, 3680, 4600, 3300, 449, 1499, 249, 599, 824,
449, 1199, 999, 1199, 999, 419, 599, 1790, 968.566666666667,
1086.6175, 904, 1850, 1290, 790, 1416.01, 1599, 2199, 1599,
1599, 899, 1599, 299, 1499, 1499, 1499, 508.26, 3590, 1590,
699, 199, 599, 298, 699, 465.666666666667, 1390, 1690, 1490,
799, 1399, 790, 499, 6799, 300, 899, 299, 1999, 299, 299,
2600, 100, 549, 149, 1049, 1599, 499, 629, 659, 249, 299,
329, 399, 949, 1324.25, 1499, 400, 1324.25, 530.666666666667,
79), item_cnt_month1 = c(52, 7, 7, 7, 28, 28, 28, 41, 41,
41, 114, 114, 50, 37, 223, 223, 223, 223, 223, 35, 223, 50,
50, 54, 114, 35, 54, 54, 54, 54, 54, 114, 35, 35, 35, 41,
114, 223, 114, 35, 35, 35, 35, 35, 41, 32, 114, 223, 25,
10, 25, 223, 1, 41, 41, 223, 41, 37, 37, 114, 37, 32, 37,
12, 12, 12, 12, 17, 52, 52, 54, 41, 175, 52, 175, 52, 41,
17, 28, 41, 41, 223, 114, 223, 223, 21, 175, 175, 17, 41,
17, 114, 175, 52, 12, 12, 41, 175, 175, 21, 28, 28, 18, 18,
52, 175, 175, 11, 11, 11, 11, 8, 11, 11, 8, 175, 52, 41,
17, 52, 17, 17, 1, 17, 8, 41, 41, 41, 41, 175, 17, 17, 52,
8, 8, 8, 10, 10, 10, 10, 21, 10, 10, 10, 10, 6, 24, 115,
115, 115, 115, 115, 37, 115, 115, 17, 115, 22, 70, 37, 22,
22, 22, 22, 37, 37, 37, 37, 37, 70, 37, 115, 8, 70, 37, 115,
22, 21, 4, 115, 1, 115, 24, 115, 8, 8, 8, 8, 13, 22, 21,
4, 5, 70, 21, 4, 115, 115, 10, 10, 13, 40, 17, 70, 40, 2,
5, 7, 7, 7, 5, 6, 13, 1, 5, 13, 21, 1), item_price1 = c(532.060196078431,
1285.71428571429, 1285.71428571429, 1285.71428571429, 363.592592592593,
363.592592592593, 363.592592592593, 256.560975609756, 256.560975609756,
256.560975609756, 1516.5925, 1516.5925, 633.69387755102,
788.162162162162, 404.963101160861, 404.963101160861, 404.963101160861,
404.963101160861, 404.963101160861, 1924.71428571429, 404.963101160861,
633.69387755102, 633.69387755102, 1520.99375, 1516.5925,
1924.71428571429, 1520.99375, 1520.99375, 1520.99375, 1520.99375,
1520.99375, 1516.5925, 1924.71428571429, 1924.71428571429,
1924.71428571429, 256.560975609756, 1516.5925, 404.963101160861,
1516.5925, 1924.71428571429, 1924.71428571429, 1924.71428571429,
1924.71428571429, 1924.71428571429, 256.560975609756, 1729.99827586207,
1516.5925, 404.963101160861, 2027.2492, 1206.21111111111,
2027.2492, 404.963101160861, 999, 256.560975609756, 256.560975609756,
404.963101160861, 256.560975609756, 788.162162162162, 788.162162162162,
1516.5925, 788.162162162162, 1729.99827586207, 788.162162162162,
2402.36363636364, 2402.36363636364, 2402.36363636364, 2402.36363636364,
1038.97705882353, 532.060196078431, 532.060196078431, 1520.99375,
256.560975609756, 219.447413793103, 532.060196078431, 219.447413793103,
532.060196078431, 256.560975609756, 1038.97705882353, 363.592592592593,
256.560975609756, 256.560975609756, 404.963101160861, 1516.5925,
404.963101160861, 404.963101160861, 1048, 219.447413793103,
219.447413793103, 1038.97705882353, 256.560975609756, 1038.97705882353,
1516.5925, 219.447413793103, 532.060196078431, 294.833333333333,
294.833333333333, 256.560975609756, 219.447413793103, 219.447413793103,
1048, 363.592592592593, 363.592592592593, 588.783333333333,
588.783333333333, 532.060196078431, 219.447413793103, 219.447413793103,
540.818181818182, 540.818181818182, 540.818181818182, 540.818181818182,
307, 540.818181818182, 540.818181818182, 307, 219.447413793103,
532.060196078431, 256.560975609756, 322.529411764706, 532.060196078431,
1038.97705882353, 322.529411764706, 99, 322.529411764706,
340.75, 256.560975609756, 256.560975609756, 256.560975609756,
256.560975609756, 219.447413793103, 1038.97705882353, 1038.97705882353,
532.060196078431, 1059, 1059, 1059, 90.8333333333333, 90.8333333333333,
90.8333333333333, 90.8333333333333, 577.333333333333, 2331.11111111111,
2331.11111111111, 2331.11111111111, 2331.11111111111, 799,
739.350877192632, 499.204935897433, 499.204935897433, 499.204935897433,
499.204935897433, 499.204935897433, 1459.33823529412, 499.204935897433,
499.204935897433, 593.375, 499.204935897433, 1421.08695652174,
1407.95967741935, 1459.33823529412, 1421.08695652174, 1421.08695652174,
1421.08695652174, 1421.08695652174, 1459.33823529412, 1459.33823529412,
1459.33823529412, 1459.33823529412, 1459.33823529412, 1407.95967741935,
1459.33823529412, 499.204935897433, 1586.25, 1407.95967741935,
1459.33823529412, 499.204935897433, 1421.08695652174, 1640.21052631579,
1069.5, 499.204935897433, 599, 499.204935897433, 739.350877192632,
499.204935897433, 1122.42857142857, 1122.42857142857, 1122.42857142857,
1122.42857142857, 864.461538461538, 1421.08695652174, 577.333333333333,
2499, 349.8, 1407.95967741935, 577.333333333333, 2499, 499.204935897433,
499.204935897433, 671.555555555556, 671.555555555556, 864.461538461538,
260.25, 593.375, 1407.95967741935, 260.25, 629, 795, 274.714285714286,
274.714285714286, 274.714285714286, 795, 799, 864.461538461538,
1499, 349.8, 864.461538461538, 577.333333333333, 79), item_cnt_month2 = c(1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146,
1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488,
488, 488, 488, 488, 488), item_price2 = c(960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481,
960.654951758481, 960.654951758481, 960.654951758481, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672,
1062.43718061672)), row.names = c(NA, -218L), class = c("tbl_df",
"tbl", "data.frame"))
答案 0 :(得分:1)
可能是
library(tidyverse)
res <- samp %>%
gather("Factor", "value", contains("month"), contains("price")) %>%
group_by(Factor, item_category_id, item_id) %>%
arrange( date_block_num) %>%
mutate(value = lag(value, 1)) %>%
spread("Factor", "value")
此处第1块的item_price2的汇总平均值为960.655
答案 1 :(得分:1)
我认为关键是lag()
会查看数据框的前行,或者至少查看组中的前一行。它不会按值查找。
所以获得(我认为)想要的一种方法就是按照小组arrange()
来使用数据框。
samp %>%
group_by(shop_id, item_id) %>%
arrange(shop_id, item_id) %>%
mutate(lag_item_price2 = lag(item_price2))
您正在将具有不同分组的数据混合在一起。一些列(例如item_cnt_month and
item_price ) you want grouped by both
item_id and
shop_id . Other columns you want to group by
shop_id and
item_category_id (
item_cnt_month1 and
item_price1`)和其他列还有第三组。
这是您数据的不自然格式。结果,解决您想要做的事情将非常复杂。我们将必须(a)使用gather()
将数据重新格式化为长格式,(b)将数据过滤3次以隔离共享相同分组的每组因子,(c)重新整理和重新排列长数据这样我们就可以调用lag()
并获得预期的输出,该输出以大小不等的三个独立数据帧的形式出现。最后,(d)我们必须重新格式化数据以匹配您的输入格式。
library(tidyverse)
by_shop <- c('item_cnt_month2', 'item_price2')
by_shop_and_category <- c('item_cnt_month1', 'item_price1')
by_shop_and_item <- c('item_cnt_month', 'item_price')
long <-
samp %>%
gather(factor, value, contains("month"), contains("price"))
by_shop_df <-
long %>%
filter(factor %in% by_shop) %>%
group_by(shop_id, factor, date_block_num)
by_shop_and_category_df <-
long %>%
filter(factor %in% by_shop_and_category) %>%
group_by(shop_id, item_category_id, factor, date_block_num)
by_shop_and_item_df <-
long %>%
filter(factor %in% by_shop_and_item) %>%
group_by(shop_id, item_id, factor, date_block_num)
lag_my_value <- function(df){
df %>%
summarize(value = first(value)) %>%
arrange(date_block_num) %>%
mutate(value = lag(value, 1)) %>%
spread(factor, value)
}
my_dfs <- list(by_shop_df, by_shop_and_category_df, by_shop_and_item_df)
my_lagged_dfs <- map(my_dfs, lag_my_value)
final_answer <-
samp %>%
select(date_block_num, shop_id, item_category_id, item_id) %>%
left_join(., my_lagged_dfs[[1]], by=c('shop_id', 'date_block_num')) %>%
left_join(., my_lagged_dfs[[2]], by=c('shop_id', 'item_category_id', 'date_block_num')) %>%
left_join(., my_lagged_dfs[[3]], by=c('shop_id', 'item_id', 'date_block_num'))
final_answer