用数字替换用户名并计算订单数

时间:2016-07-23 06:18:38

标签: r

数据:

DB <- structure(list(orderItemID = 1:10, CustomerName = structure(c(1L, 
1L, 2L, 3L, 3L, 4L, 4L, 4L, 5L, 6L), .Label = c("Alex", "Bert", 
"Corel", "Dennis", "Edgar", "Fred"), class = "factor"), OrderID = structure(c(5L, 
6L, 1L, 2L, 2L, 8L, 7L, 7L, 4L, 3L), .Label = c("14", "17", "33", 
"56", "58", "62", "89", "9"), class = "factor"), ArticleDescription = structure(c(10L, 
5L, 1L, 7L, 8L, 3L, 4L, 2L, 9L, 6L), .Label = c("Adidas Jacket", 
"Adidas Shoes", "Aesics Shoes", "Boss Jeans", "Lee T-Shirt", 
"Nike Airs", "Nike Shoes", "Puma Backpack", "Puma Socks", "Wrangler Jeans"
), class = "factor")), .Names = c("orderItemID", "CustomerName", 
"OrderID", "ArticleDescription"), row.names = c(NA, -10L), class = "data.frame")

预期结果:

output <- structure(list(orderItemID = 1:10, Name = structure(c(1L, 1L, 
2L, 3L, 3L, 4L, 4L, 4L, 5L, 6L), .Label = c("1", "2", "3", "4", 
"5", "6"), class = "factor"), NumberOfOrders = structure(c(1L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"), 
    ArticleDescription = structure(c(10L, 5L, 1L, 7L, 8L, 3L, 
    4L, 2L, 9L, 6L), .Label = c("Adidas Jacket", "Adidas Shoes", 
    "Aesics Shoes", "Boss Jeans", "Lee T-Shirt", "Nike Airs", 
    "Nike Shoes", "Puma Backpack", "Puma Socks", "Wrangler Jeans"
    ), class = "factor")), .Names = c("orderItemID", "Name", 
"NumberOfOrders", "ArticleDescription"), row.names = c(NA, -10L
), class = "data.frame")

早上好!

这次我需要将CustomerName替换为以1开头的数字 - 相同的名称应该具有相同的数字 - 并且下一个名称应该具有下一个更高的数字。此外,OrderID应该由特定客户订购的订单数量重放 - 在这种情况下,当不同商品的订单ID相等时,它是一个订单(例如,Alex做了2个订单(在第一个订单中)他订购了“牧马人牛仔裤”,在第二个订购了“Lee T-Shirt”);丹尼斯也订购了2个订单(第一个订购了“Aesics Shoes”,第二个订购了“Boss Jeans”和“阿迪达斯鞋子“)最后我想保持ArticleDescription不受影响

3 个答案:

答案 0 :(得分:1)

使用dplyr的一种方式,

library(dplyr)
DB %>% 
  mutate(Name = as.integer(as.factor(CustomerName))) %>% 
  group_by(Name) %>% 
  mutate(No.of.Orders = data.table::rleid(OrderID)) %>% 
  select(-c(CustomerName, OrderID))

#Source: local data frame [10 x 4]
#Groups: Name [6]

#   orderItemID ArticleDescription  Name No.of.Orders
#         (int)             (fctr) (int)        (int)
#1            1     Wrangler Jeans     1            1
#2            2        Lee T-Shirt     1            2
#3            3      Adidas Jacket     2            1
#4            4         Nike Shoes     3            1
#5            5      Puma Backpack     3            1
#6            6       Aesics Shoes     4            1
#7            7         Boss Jeans     4            2
#8            8       Adidas Shoes     4            2
#9            9         Puma Socks     5            1
#10          10          Nike Airs     6            1   

答案 1 :(得分:0)

您可以轻松获取名称

number_of_orders <- table(DB$CustomerName)
name <- rep(1:length(unique(DB$CustomerName)),
        number_of_orders)

但我认为亚历克斯的建议更好。

答案 2 :(得分:0)

library(dplyr)

DB %>% mutate(Name = dense_rank(CustomerName), 
          No.of.Orders=(ifelse(is.na(OrderID !=lag(OrderID)), TRUE, (OrderID !=lag(OrderID)))*1 )) %>% 
  group_by(CustomerName) %>% 
mutate(No.of.Orders = cumsum(No.of.Orders))