根据1 AND 0到dplyr先前条件为新列分配值

时间:2017-12-14 08:59:04

标签: r dplyr

我有一个包含每周品牌销售数据的数据集,我想过滤掉包含0销售的品牌。我通过以下方式识别这些品牌:

structure(list(Week = 7:17, Category = c("2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2"), Brand = c("3", "3", "3", 
"3", "3", "3", "3", "3", "3", "3", "3"), Display = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), Sales = c(0, 0, 0, 0, 13.440948, 40.097397, 
32.01384, 382.169189, 2830.748779, 4524.460938, 1053.590576), 
    Price = c(0, 0, 0, 0, 5.949999, 5.95, 5.950003, 4.87759, 
    3.787015, 3.205987, 4.898724), Distribution = c(0, 0, 0, 
    0, 1.394019, 1.386989, 1.621416, 8.209759, 8.552915, 9.692097, 
    9.445554), Advertising = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0), lnSales = c(11.4945151554497, 11.633214247508, 11.5862944141137, 
    11.5412559646132, 11.4811122484454, 11.4775106999991, 11.6333660772506, 
    11.4859819773102, 11.5232680456161, 11.5572670584292, 11.5303686934256
    ), IntrayearCycles = c(4.15446534315765, 3.62757053512638, 
    2.92387946552647, 2.14946414386239, 1.40455011205262, 0.768856938870769, 
    0.291497141953598, -0.0131078404184544, -0.162984144025091, 
    -0.200882782749248, -0.182877633924882), `Competitor Advertising` = c(10584.87063, 
    224846.3243, 90657.72553, 0, 0, 0, 2396.54212, 0, 0, 0, 40343.49444
    ), `Competitor Display` = c(0.385629, 2.108133, 2.515806, 
    4.918288, 3.81749, 3.035847, 2.463194, 3.242594, 1.850399, 
    1.751096, 1.337943), `Competitor Prices` = c(5.30989, 5.372752, 
    5.3717245, 5.3295525, 5.298393, 5.319466, 5.1958415, 5.2941095, 
    5.296757, 5.294059, 5.273578), ZeroSales = c(1, 1, 1, 1, 
    0, 0, 0, 0, 0, 0, 0)), .Names = c("Week", "Category", "Brand", 
"Display", "Sales", "Price", "Distribution", "Advertising", "lnSales", 
"IntrayearCycles", "Competitor Advertising", "Competitor Display", 
"Competitor Prices", "ZeroSales"), row.names = 1255:1265, class = "data.frame")

现在,它在几个星期内为品牌分配1,销售额为零,但仅在特定周数内销售额为零(第1周:208为全范围)。我不仅要删除这几周,还要删除整个品牌。我必须找到一个命令,使品牌的所有其他几周表明零销售也变为1。一个合作的例子!

<file-upload v-model="files"></file-upload>
<button type="submit" v-on:click.prevent="Submit">Submit</button>

<script>
  methods: {
    data: function () {
      return {
        config: {
          'headers': {'Authorization': 'JWT ' + this.$store.state.token},
          'Content-Type': 'multipart/form-data'
        }
    },
    methods:{
      for (var file in this.files) {
        let data = new FormData()
        data.append('image', this.file[0])
        data.append('caption', 'image')
        data.append('user', this.Authuser)
        api.post('/photos/create/', data, this.config)
      }
    }
  }
</script>

1 个答案:

答案 0 :(得分:0)

注意:我稍微修改了您的示例数据以适应所有可能的情况(从dput输出下方使用它。)

在以下示例数据(即df)中,您可以观察到品牌ID

  • 3在几周内销售额为零,
  • 4在所有可用周内的销售额为零,
  • 5在所有可用周内都有“非零”销售。

根据您的UC,最终输出中应该存在的品牌ID仅为5

library(dplyr)
df_new <- df %>% filter(!(Brand %in% unique(.[Sales==0,'Brand'])))
df_new

输出是:

  Week Category Brand Display    Sales    Price Distribution Advertising  lnSales IntrayearCycles
1   16        2     5       0 4524.461 3.205987     9.692097           0 11.55727      -0.2008828
2   17        2     5       0 1053.591 4.898724     9.445554           0 11.53037      -0.1828776
  Competitor Advertising Competitor Display Competitor Prices
1                   0.00           1.751096          5.294059
2               40343.49           1.337943          5.273578


#sample data
> dput(df)
structure(list(Week = 7:17, Category = c("2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2"), Brand = c("3", "3", "3", 
"3", "3", "3", "4", "4", "4", "5", "5"), Display = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), Sales = c(0, 0, 0, 0, 13.440948, 40.097397, 
0, 0, 0, 4524.460938, 1053.590576), Price = c(0, 0, 0, 0, 5.949999, 
5.95, 5.950003, 4.87759, 3.787015, 3.205987, 4.898724), Distribution = c(0, 
0, 0, 0, 1.394019, 1.386989, 1.621416, 8.209759, 8.552915, 9.692097, 
9.445554), Advertising = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    lnSales = c(11.4945151554497, 11.633214247508, 11.5862944141137, 
    11.5412559646132, 11.4811122484454, 11.4775106999991, 11.6333660772506, 
    11.4859819773102, 11.5232680456161, 11.5572670584292, 11.5303686934256
    ), IntrayearCycles = c(4.15446534315765, 3.62757053512638, 
    2.92387946552647, 2.14946414386239, 1.40455011205262, 0.768856938870769, 
    0.291497141953598, -0.0131078404184544, -0.162984144025091, 
    -0.200882782749248, -0.182877633924882), `Competitor Advertising` = c(10584.87063, 
    224846.3243, 90657.72553, 0, 0, 0, 2396.54212, 0, 0, 0, 40343.49444
    ), `Competitor Display` = c(0.385629, 2.108133, 2.515806, 
    4.918288, 3.81749, 3.035847, 2.463194, 3.242594, 1.850399, 
    1.751096, 1.337943), `Competitor Prices` = c(5.30989, 5.372752, 
    5.3717245, 5.3295525, 5.298393, 5.319466, 5.1958415, 5.2941095, 
    5.296757, 5.294059, 5.273578)), .Names = c("Week", "Category", 
"Brand", "Display", "Sales", "Price", "Distribution", "Advertising", 
"lnSales", "IntrayearCycles", "Competitor Advertising", "Competitor Display", 
"Competitor Prices"), class = "data.frame", row.names = 1255:1265)

希望这有帮助!