计算汇总数据的滞后

时间:2019-04-09 12:34:48

标签: r dplyr

我有这种数据:

library(dplyr)
glimpse(samp)
Observations: 218
Variables: 10
$ date_block_num   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ shop_id          <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ item_id          <int> 33, 482, 491, 839, 1007, 1010, 1023, 1204, 1224, 1247, 1409, ...
$ item_category_id <int> 37, 73, 73, 73, 67, 67, 67, 55, 55, 55, 19, 19, 21, 28, 30, 3...
$ item_cnt_month   <dbl> 1, 1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 6, 4, 3, 3, 1, 3, 1, 1, 1...
$ item_price       <dbl> 499.0000, 3300.0000, 600.0000, 3300.0000, 449.0000, 449.0000,...
$ item_cnt_month1  <dbl> 52, 7, 7, 7, 28, 28, 28, 41, 41, 41, 114, 114, 50, 37, 223, 2...
$ item_price1      <dbl> 532.0602, 1285.7143, 1285.7143, 1285.7143, 363.5926, 363.5926...
$ item_cnt_month2  <dbl> 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1...
$ item_price2      <dbl> 960.655, 960.655, 960.655, 960.655, 960.655, 960.655, 960.655...

date_block_num是一年的一个月。 shop_iditem_iditem_category_id是标识符。 item_cnt_monthitem_price是每shop_iditem_id的总销售额和价格。 item_cnt_month1item_price1是每shop_iditem_category_id的总销售额和价格。 item_cnt_month2item_price2shop_id的总销售额和价格。每月(即每个date_num_block)进行汇总。

我的问题是如何将数据(价格和销售汇总)滞后一个月?

对于item_cnt_monthitem_price我做到了:

samp %>%
  group_by(shop_id, item_id) %>%
  mutate(lag_price = lag(item_price, 1),
         lag_item = lag(item_cnt_month, 1)) 

结果,item_priceitem_cnt_month在一个月内“下滑”。这是因为每个shop_iditem_id组合每月仅出现一次(date_block_num)。

但是,对于item_cnt_month2/3item_price2/3,它不起作用,因为这些变量是不同的。他们每个月可能出现多次。任何想法如何做到这一点?

dput(samp)
structure(list(date_block_num = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
    shop_id = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), item_id = c(33L, 
    482L, 491L, 839L, 1007L, 1010L, 1023L, 1204L, 1224L, 1247L, 
    1409L, 1467L, 1523L, 1829L, 1905L, 1916L, 2252L, 2308L, 2416L, 
    2678L, 2808L, 2939L, 2946L, 3007L, 3076L, 3077L, 3141L, 3146L, 
    3158L, 3159L, 3160L, 3316L, 3328L, 3554L, 3556L, 3656L, 3686L, 
    3719L, 3851L, 4240L, 4241L, 4244L, 4248L, 4249L, 4531L, 4901L, 
    4906L, 5272L, 5572L, 5583L, 5643L, 5811L, 5822L, 5945L, 6007L, 
    6185L, 6238L, 6432L, 6488L, 6727L, 6953L, 7220L, 7856L, 7893L, 
    7894L, 7895L, 7956L, 8093L, 8094L, 8095L, 8528L, 8571L, 8743L, 
    8958L, 9507L, 10039L, 10391L, 10423L, 10669L, 10904L, 10930L, 
    11304L, 11391L, 12134L, 12135L, 12286L, 12361L, 12828L, 12830L, 
    12964L, 13040L, 13511L, 13923L, 14140L, 14215L, 14223L, 14252L, 
    14386L, 14447L, 15104L, 15118L, 15134L, 15456L, 15458L, 15702L, 
    15833L, 15836L, 16147L, 16154L, 16159L, 16167L, 16205L, 16206L, 
    16219L, 16227L, 16450L, 16451L, 16459L, 16786L, 16987L, 17481L, 
    17625L, 17717L, 17756L, 18059L, 18255L, 18378L, 18669L, 19046L, 
    19597L, 19674L, 19778L, 20212L, 20377L, 20518L, 20881L, 22087L, 
    22088L, 22091L, 22092L, 31L, 482L, 496L, 835L, 839L, 1007L, 
    1829L, 1916L, 2252L, 2308L, 2416L, 2445L, 2678L, 2753L, 2808L, 
    2956L, 2972L, 3007L, 3076L, 3077L, 3141L, 3148L, 3158L, 3159L, 
    3328L, 3394L, 3554L, 3556L, 3676L, 3851L, 4249L, 4790L, 4901L, 
    4906L, 4907L, 5272L, 5380L, 5572L, 5583L, 5811L, 5821L, 6156L, 
    6488L, 6740L, 7882L, 7893L, 7894L, 7895L, 8093L, 8528L, 8958L, 
    9001L, 9465L, 11391L, 11607L, 11655L, 12134L, 12135L, 12208L, 
    12286L, 12360L, 12839L, 13460L, 13511L, 14447L, 14931L, 16167L, 
    16184L, 16205L, 16210L, 16219L, 16315L, 17481L, 17717L, 19344L, 
    19674L, 19813L, 22088L), item_category_id = c(37L, 73L, 73L, 
    73L, 67L, 67L, 67L, 55L, 55L, 55L, 19L, 19L, 21L, 28L, 30L, 
    30L, 30L, 30L, 30L, 23L, 30L, 21L, 21L, 75L, 19L, 23L, 75L, 
    75L, 75L, 75L, 75L, 19L, 23L, 23L, 23L, 55L, 19L, 30L, 19L, 
    23L, 23L, 23L, 23L, 23L, 55L, 22L, 19L, 30L, 2L, 5L, 2L, 
    30L, 35L, 55L, 55L, 30L, 55L, 28L, 28L, 19L, 28L, 22L, 28L, 
    6L, 6L, 6L, 6L, 38L, 37L, 37L, 75L, 55L, 40L, 37L, 40L, 37L, 
    55L, 38L, 67L, 55L, 55L, 30L, 19L, 30L, 30L, 62L, 40L, 40L, 
    38L, 55L, 38L, 19L, 40L, 37L, 57L, 57L, 55L, 40L, 40L, 62L, 
    67L, 67L, 63L, 63L, 37L, 40L, 40L, 64L, 64L, 64L, 64L, 65L, 
    64L, 64L, 65L, 40L, 37L, 55L, 43L, 37L, 38L, 43L, 79L, 43L, 
    49L, 55L, 55L, 55L, 55L, 40L, 38L, 38L, 37L, 72L, 72L, 72L, 
    83L, 83L, 83L, 83L, 37L, 73L, 73L, 73L, 73L, 67L, 28L, 30L, 
    30L, 30L, 30L, 30L, 23L, 30L, 30L, 21L, 30L, 75L, 19L, 23L, 
    75L, 75L, 75L, 75L, 23L, 23L, 23L, 23L, 23L, 19L, 23L, 30L, 
    22L, 19L, 23L, 30L, 75L, 2L, 5L, 30L, 35L, 30L, 28L, 30L, 
    6L, 6L, 6L, 6L, 38L, 75L, 37L, 41L, 49L, 19L, 37L, 41L, 30L, 
    30L, 62L, 62L, 38L, 40L, 21L, 19L, 40L, 69L, 64L, 65L, 65L, 
    65L, 64L, 67L, 38L, 79L, 49L, 38L, 37L, 83L), item_cnt_month = c(1, 
    1, 1, 1, 3, 1, 2, 1, 1, 1, 1, 1, 6, 4, 3, 3, 1, 3, 1, 1, 
    17, 2, 2, 6, 1, 1, 1, 2, 5, 2, 1, 2, 1, 1, 1, 1, 4, 1, 3, 
    1, 1, 1, 1, 4, 1, 2, 3, 3, 9, 2, 0, 7, 1, 1, 1, 1, 1, 1, 
    1, 1, 4, 4, 2, 3, 1, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 
    1, 1, 3, 2, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 2, 3, 
    3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 2, 1, 1, 4, 4, 1, 1, 2, 1, 1, 2, 2, 1, 4, 1, 8, 2, 
    7, 4, 1, 2, 2, 3, 4, 1, 1, -1, 1, 4, 1, 2, 1, 1, 2, 1, 1, 
    3, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 2, 1, 1, 2, 1, 
    1, 1, 2, 1, 1, 2, 1, 1, 3, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 
    1, 2, 2, 1, 1, 2, 3, 1), item_price = c(499, 3300, 600, 3300, 
    449, 449, 299, 299, 399, 299, 1398.5, 899, 799, 1499, 249, 
    249, 599, 799, 449, 999, 998.981481481444, 599, 599, 1790, 
    1399, 1199, 1130, 2490, 1290, 790, 1590, 1199, 1999, 2198.5, 
    2199, 299, 898.875, 599, 899, 1999, 1399, 1999, 1999, 1899, 
    279, 1499, 1698.83333333333, 839, 1532.85714285714, 594.15, 
    2387.605, 199, 999, 279, 299, 499, 299, 249, 699, 999, 399, 
    1998.85, 799, 1990, 1490, 899, 6890, 1399, 1068.54, 420.39, 
    632, 299, 149, 249.5, 149, 299, 199, 1199, 750, 149, 199, 
    349, 899, 299, 299, 100, 149, 149, 749, 199, 549, 1599, 149, 
    299, 299, 299, 199, 149, 499, 1289, 170, 170, 549, 449, 549, 
    149, 149, 549, 639, 629, 659, 299, 599, 399, 379, 399, 499, 
    299, 249, 299, 1178.61, 299, 99, 349, 249, 199, 199, 199, 
    299, 149, 1399, 549, 68.75, 1199, 1229, 649, 79, 79, 109, 
    109, 699, 3300, 3680, 4600, 3300, 449, 1499, 249, 599, 824, 
    449, 1199, 999, 1199, 999, 419, 599, 1790, 968.566666666667, 
    1086.6175, 904, 1850, 1290, 790, 1416.01, 1599, 2199, 1599, 
    1599, 899, 1599, 299, 1499, 1499, 1499, 508.26, 3590, 1590, 
    699, 199, 599, 298, 699, 465.666666666667, 1390, 1690, 1490, 
    799, 1399, 790, 499, 6799, 300, 899, 299, 1999, 299, 299, 
    2600, 100, 549, 149, 1049, 1599, 499, 629, 659, 249, 299, 
    329, 399, 949, 1324.25, 1499, 400, 1324.25, 530.666666666667, 
    79), item_cnt_month1 = c(52, 7, 7, 7, 28, 28, 28, 41, 41, 
    41, 114, 114, 50, 37, 223, 223, 223, 223, 223, 35, 223, 50, 
    50, 54, 114, 35, 54, 54, 54, 54, 54, 114, 35, 35, 35, 41, 
    114, 223, 114, 35, 35, 35, 35, 35, 41, 32, 114, 223, 25, 
    10, 25, 223, 1, 41, 41, 223, 41, 37, 37, 114, 37, 32, 37, 
    12, 12, 12, 12, 17, 52, 52, 54, 41, 175, 52, 175, 52, 41, 
    17, 28, 41, 41, 223, 114, 223, 223, 21, 175, 175, 17, 41, 
    17, 114, 175, 52, 12, 12, 41, 175, 175, 21, 28, 28, 18, 18, 
    52, 175, 175, 11, 11, 11, 11, 8, 11, 11, 8, 175, 52, 41, 
    17, 52, 17, 17, 1, 17, 8, 41, 41, 41, 41, 175, 17, 17, 52, 
    8, 8, 8, 10, 10, 10, 10, 21, 10, 10, 10, 10, 6, 24, 115, 
    115, 115, 115, 115, 37, 115, 115, 17, 115, 22, 70, 37, 22, 
    22, 22, 22, 37, 37, 37, 37, 37, 70, 37, 115, 8, 70, 37, 115, 
    22, 21, 4, 115, 1, 115, 24, 115, 8, 8, 8, 8, 13, 22, 21, 
    4, 5, 70, 21, 4, 115, 115, 10, 10, 13, 40, 17, 70, 40, 2, 
    5, 7, 7, 7, 5, 6, 13, 1, 5, 13, 21, 1), item_price1 = c(532.060196078431, 
    1285.71428571429, 1285.71428571429, 1285.71428571429, 363.592592592593, 
    363.592592592593, 363.592592592593, 256.560975609756, 256.560975609756, 
    256.560975609756, 1516.5925, 1516.5925, 633.69387755102, 
    788.162162162162, 404.963101160861, 404.963101160861, 404.963101160861, 
    404.963101160861, 404.963101160861, 1924.71428571429, 404.963101160861, 
    633.69387755102, 633.69387755102, 1520.99375, 1516.5925, 
    1924.71428571429, 1520.99375, 1520.99375, 1520.99375, 1520.99375, 
    1520.99375, 1516.5925, 1924.71428571429, 1924.71428571429, 
    1924.71428571429, 256.560975609756, 1516.5925, 404.963101160861, 
    1516.5925, 1924.71428571429, 1924.71428571429, 1924.71428571429, 
    1924.71428571429, 1924.71428571429, 256.560975609756, 1729.99827586207, 
    1516.5925, 404.963101160861, 2027.2492, 1206.21111111111, 
    2027.2492, 404.963101160861, 999, 256.560975609756, 256.560975609756, 
    404.963101160861, 256.560975609756, 788.162162162162, 788.162162162162, 
    1516.5925, 788.162162162162, 1729.99827586207, 788.162162162162, 
    2402.36363636364, 2402.36363636364, 2402.36363636364, 2402.36363636364, 
    1038.97705882353, 532.060196078431, 532.060196078431, 1520.99375, 
    256.560975609756, 219.447413793103, 532.060196078431, 219.447413793103, 
    532.060196078431, 256.560975609756, 1038.97705882353, 363.592592592593, 
    256.560975609756, 256.560975609756, 404.963101160861, 1516.5925, 
    404.963101160861, 404.963101160861, 1048, 219.447413793103, 
    219.447413793103, 1038.97705882353, 256.560975609756, 1038.97705882353, 
    1516.5925, 219.447413793103, 532.060196078431, 294.833333333333, 
    294.833333333333, 256.560975609756, 219.447413793103, 219.447413793103, 
    1048, 363.592592592593, 363.592592592593, 588.783333333333, 
    588.783333333333, 532.060196078431, 219.447413793103, 219.447413793103, 
    540.818181818182, 540.818181818182, 540.818181818182, 540.818181818182, 
    307, 540.818181818182, 540.818181818182, 307, 219.447413793103, 
    532.060196078431, 256.560975609756, 322.529411764706, 532.060196078431, 
    1038.97705882353, 322.529411764706, 99, 322.529411764706, 
    340.75, 256.560975609756, 256.560975609756, 256.560975609756, 
    256.560975609756, 219.447413793103, 1038.97705882353, 1038.97705882353, 
    532.060196078431, 1059, 1059, 1059, 90.8333333333333, 90.8333333333333, 
    90.8333333333333, 90.8333333333333, 577.333333333333, 2331.11111111111, 
    2331.11111111111, 2331.11111111111, 2331.11111111111, 799, 
    739.350877192632, 499.204935897433, 499.204935897433, 499.204935897433, 
    499.204935897433, 499.204935897433, 1459.33823529412, 499.204935897433, 
    499.204935897433, 593.375, 499.204935897433, 1421.08695652174, 
    1407.95967741935, 1459.33823529412, 1421.08695652174, 1421.08695652174, 
    1421.08695652174, 1421.08695652174, 1459.33823529412, 1459.33823529412, 
    1459.33823529412, 1459.33823529412, 1459.33823529412, 1407.95967741935, 
    1459.33823529412, 499.204935897433, 1586.25, 1407.95967741935, 
    1459.33823529412, 499.204935897433, 1421.08695652174, 1640.21052631579, 
    1069.5, 499.204935897433, 599, 499.204935897433, 739.350877192632, 
    499.204935897433, 1122.42857142857, 1122.42857142857, 1122.42857142857, 
    1122.42857142857, 864.461538461538, 1421.08695652174, 577.333333333333, 
    2499, 349.8, 1407.95967741935, 577.333333333333, 2499, 499.204935897433, 
    499.204935897433, 671.555555555556, 671.555555555556, 864.461538461538, 
    260.25, 593.375, 1407.95967741935, 260.25, 629, 795, 274.714285714286, 
    274.714285714286, 274.714285714286, 795, 799, 864.461538461538, 
    1499, 349.8, 864.461538461538, 577.333333333333, 79), item_cnt_month2 = c(1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 
    1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 1146, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 488, 
    488, 488, 488, 488, 488), item_price2 = c(960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 960.654951758481, 
    960.654951758481, 960.654951758481, 960.654951758481, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672, 1062.43718061672, 1062.43718061672, 1062.43718061672, 
    1062.43718061672)), row.names = c(NA, -218L), class = c("tbl_df", 
"tbl", "data.frame"))

2 个答案:

答案 0 :(得分:1)

可能是

library(tidyverse)

res <- samp %>% 
  gather("Factor", "value", contains("month"), contains("price")) %>% 
  group_by(Factor, item_category_id, item_id) %>% 
  arrange( date_block_num) %>% 
  mutate(value = lag(value, 1)) %>% 
  spread("Factor", "value")

此处第1块的item_price2的汇总平均值为960.655

答案 1 :(得分:1)

第一次尝试

我认为关键是lag()会查看数据框的前,或者至少查看组中的前一行。它不会按值查找。

所以获得(我认为)想要的一种方法就是按照小组arrange()来使用数据框。

samp %>%
    group_by(shop_id, item_id) %>%
    arrange(shop_id, item_id) %>%
    mutate(lag_item_price2 = lag(item_price2))

第二次尝试

您正在将具有不同分组的数据混合在一起。一些列(例如item_cnt_month and item_price ) you want grouped by both item_id and shop_id . Other columns you want to group by shop_id and item_category_id ( item_cnt_month1 and item_price1`)和其他列还有第三组。

这是您数据的不自然格式。结果,解决您想要做的事情将非常复杂。我们将必须(a)使用gather()将数据重新格式化为长格式,(b)将数据过滤3次以隔离共享相同分组的每组因子,(c)重新整理和重新排列长数据这样我们就可以调用lag()并获得预期的输出,该输出以大小不等的三个独立数据帧的形式出现。最后,(d)我们必须重新格式化数据以匹配您的输入格式。

library(tidyverse)

by_shop <- c('item_cnt_month2', 'item_price2')
by_shop_and_category <- c('item_cnt_month1', 'item_price1')
by_shop_and_item <- c('item_cnt_month', 'item_price')

long <-
    samp %>% 
    gather(factor, value, contains("month"), contains("price")) 

by_shop_df <- 
    long %>% 
    filter(factor %in% by_shop) %>% 
    group_by(shop_id, factor, date_block_num) 

by_shop_and_category_df <- 
    long %>% 
    filter(factor %in% by_shop_and_category) %>% 
    group_by(shop_id, item_category_id, factor, date_block_num) 

by_shop_and_item_df <- 
    long %>% 
    filter(factor %in% by_shop_and_item) %>% 
    group_by(shop_id, item_id, factor, date_block_num) 

lag_my_value <- function(df){
    df %>%
    summarize(value = first(value)) %>%
    arrange(date_block_num) %>% 
    mutate(value = lag(value, 1)) %>%
    spread(factor, value)
}

my_dfs <- list(by_shop_df, by_shop_and_category_df, by_shop_and_item_df)

my_lagged_dfs <- map(my_dfs, lag_my_value)

final_answer <-
    samp %>%
    select(date_block_num, shop_id, item_category_id, item_id) %>%
    left_join(., my_lagged_dfs[[1]], by=c('shop_id', 'date_block_num')) %>%
    left_join(., my_lagged_dfs[[2]], by=c('shop_id', 'item_category_id', 'date_block_num')) %>%
    left_join(., my_lagged_dfs[[3]], by=c('shop_id', 'item_id', 'date_block_num'))

final_answer