Question

我下面有一个数据格式，如附件图像，在该数据中，我们有多年不同SKU的销售数字，每月分发一次。

我使用该函数计算了每年的每月指数；

calc_monthly_all<- function(X){
  X$sku <- as.character(X$sku)
  X$Yearly_Avg <- rowMeans(X[,-c(1:2)])
  level_one <- X %>% dplyr::mutate_at(vars(starts_with('Month_')), funs(./Yearly_Avg))
  return(level_one)
  }

显示的数据在R中读为'df_data'，应用该功能后，输出如下所示，

> calc_monthly_all(df_data)
             sku Year  Month_Jan  Month_Feb Month_March Month_April Month_May Month_June Month_July Month_Aug Month_Sept
1 10929000284004 2015 0.32601608 0.19821778   0.6755053   2.0838948 0.5398826  1.7657031  1.2414692 1.1136709  0.6650728
2 10929000284004 2016 0.92209048 2.52811562   1.4961071   1.8380744 0.6534019  0.7511068  0.5190576 0.2992214  0.1038115
3 10929001124004 2014 0.04678503 0.04318618   0.4847649   0.9385797 0.9781670  0.9353407  0.7924664 1.2894674  1.2070537
4 10929001124004 2015 1.40699537 1.86480849   0.7979080   0.7520179 0.3975936  1.1719218  0.4233502 0.6419178  1.5749569
5 10929001124004 2016 0.92209048 2.52811562   1.4961071   1.8380744 0.6534019  0.7511068  0.5190576 0.2992214  0.1038115
6 10929001124104 2016 1.00160192 0.65078094   0.8191163   0.8358030 1.0112802  0.9419971  0.7209318 1.0449873  1.0337071
7 10929001124104 2017 0.83334681 0.74955923   0.7739514   1.2059589 1.1626741  1.4993773  1.0948676 0.9872054  1.1436520
  Month_Oct Month_Nov Month_Dec Yearly_Avg
1 1.5831341 1.1945229 0.6129102   3834.167
2 1.2029922 0.5434838 1.1425373   1637.583
3 2.2273273 1.7357246 1.3211372  27786.667
4 0.9569818 0.6437317 1.3678164  55131.667
5 1.2029922 0.5434838 1.1425373   1637.583
6 1.3110399 1.3426111 1.2861434 149820.000
7 0.9806706 0.8718438 0.6968927 154557.500

现在，对于每个SKU的每一年，我们都获得了月度索引。现在，我们需要获取特定年份的指标，比方说第一年。这意味着对于每个SKU，我们将获取其各自第一年的月度索引，并返回仅包含每个SKU的第一年月度索引的数据框。为此，我尝试了

calc_monthly_fys<- function(X){
  X$sku <- as.character(X$sku)
  X$Yearly_Avg <- rowMeans(X[,-c(1:2)])
  level_one <- X %>% dplyr::mutate_at(vars(starts_with('Month_')), funs(./Yearly_Avg))
  first_yr_store <- data.frame()
  for (i in unique(level_one$sku)){
    fys1 <- subset(level_one,sku %in% i)
    fys <- fys1[1,]
    df <-data.frame(fys)
    df_total <- rbind(first_yr_store,df)
    return(df_total)
  }
}

但是，它没有给出正确的结果，只有第一个SKU出现。

calc_monthly_fys(df_data)
             sku Year Month_Jan Month_Feb Month_March Month_April Month_May Month_June Month_July Month_Aug Month_Sept
1 10929000284004 2015 0.3260161 0.1982178   0.6755053    2.083895 0.5398826   1.765703   1.241469  1.113671  0.6650728
  Month_Oct Month_Nov Month_Dec Yearly_Avg
1  1.583134  1.194523 0.6129102   3834.167

所有SKU都需要这个。

在此示例中，有3个唯一的SKU，但数据的SKU可能为'n'。最后，我需要的输出应采用以下格式；

列名称（SKU_1至SKU_N）应动态出现（如果我们有四个唯一的SKU，则四个SKU名称应作为列名称出现）。 _FYI在这里表示“第一年索引”，我认为我们可以使用paste函数在最终输出表中的每个唯一SKU名称后附加后缀_FYI。

请帮助我以讨论的格式生成N个SKU和M个年的月度索引。

TIA

Answer 1

尚不清楚您要如何处理每个SKU的不同年份，但这可能会有所帮助。

假定df包含您通过calc_monthly_all(df_data)调用显示的内容。

# Get rid of the yearly average, as it's not needed in the results
df$Yearly_Avg <- NULL

# We're going to transpose the dataframe  (turn the rows into columns,
# and the columns into rows), so make the row names what will be the 
# column names
rownames(df) <- paste0(df$sku, '_', df$Year)

# Get rid of the unnecessary sku and year columns, as they're not needed
# if we leave them, the transpose will coerce all the columns to characters
df$sku <- NULL
df$Year <- NULL

# Transpose the dataframe
df2 <- t(df)

# Get rid of "Month_" at the start of the rownames
rownames(df2) <- gsub('^Month_', '', rownames(df2))

# View the results
df2

编辑

根据您的注释和以前的代码，该函数将如下所示：

library(dplyr)

calc_monthly_all<- function(X){
  X$sku <- as.character(X$sku)
  X$Yearly_Avg <- rowMeans(X[,-c(1:2)])
  level_one <- X %>% dplyr::mutate_at(vars(starts_with('Month_')), funs(./Yearly_Avg))

  level_one$Yearly_Avg <- NULL

  rownames(level_one) <- paste0(level_one$sku, '_', level_one$Year)
  level_one$sku <- NULL
  level_one$Year <- NULL

  result <- t(level_one)
  rownames(result) <- gsub('^Month_', '', rownames(result))
  result
}

编辑2 如果只需要一年的数据，则可以在进行转置之前过滤函数：

library(dplyr)

calc_monthly_one_year<- function(X, required_year){
  X <- X %>% filter(Year == required_year)

  X$sku <- as.character(X$sku)
  X$Yearly_Avg <- rowMeans(X[,-c(1:2)])
  level_one <- X %>% dplyr::mutate_at(vars(starts_with('Month_')), funs(./Yearly_Avg))

  level_one$Yearly_Avg <- NULL

  rownames(level_one) <- level_one$sku
  level_one$sku <- NULL
  level_one$Year <- NULL

  result <- t(level_one)
  rownames(result) <- gsub('^Month_', '', rownames(result))
  result
}

calc_monthly_one_year(df_data, 2016)

循环播放特定年份的月度索引

1 个答案: