数据:
DB <- structure(list(orderItemID = 1:10, CustomerName = structure(c(1L,
1L, 2L, 3L, 3L, 4L, 4L, 4L, 5L, 6L), .Label = c("Alex", "Bert",
"Corel", "Dennis", "Edgar", "Fred"), class = "factor"), OrderID = structure(c(5L,
6L, 1L, 2L, 2L, 8L, 7L, 7L, 4L, 3L), .Label = c("14", "17", "33",
"56", "58", "62", "89", "9"), class = "factor"), ArticleDescription = structure(c(10L,
5L, 1L, 7L, 8L, 3L, 4L, 2L, 9L, 6L), .Label = c("Adidas Jacket",
"Adidas Shoes", "Aesics Shoes", "Boss Jeans", "Lee T-Shirt",
"Nike Airs", "Nike Shoes", "Puma Backpack", "Puma Socks", "Wrangler Jeans"
), class = "factor")), .Names = c("orderItemID", "CustomerName",
"OrderID", "ArticleDescription"), row.names = c(NA, -10L), class = "data.frame")
预期结果:
output <- structure(list(orderItemID = 1:10, Name = structure(c(1L, 1L,
2L, 3L, 3L, 4L, 4L, 4L, 5L, 6L), .Label = c("1", "2", "3", "4",
"5", "6"), class = "factor"), NumberOfOrders = structure(c(1L,
2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L), .Label = c("1", "2"), class = "factor"),
ArticleDescription = structure(c(10L, 5L, 1L, 7L, 8L, 3L,
4L, 2L, 9L, 6L), .Label = c("Adidas Jacket", "Adidas Shoes",
"Aesics Shoes", "Boss Jeans", "Lee T-Shirt", "Nike Airs",
"Nike Shoes", "Puma Backpack", "Puma Socks", "Wrangler Jeans"
), class = "factor")), .Names = c("orderItemID", "Name",
"NumberOfOrders", "ArticleDescription"), row.names = c(NA, -10L
), class = "data.frame")
早上好!
这次我需要将CustomerName
替换为以1开头的数字 - 相同的名称应该具有相同的数字 - 并且下一个名称应该具有下一个更高的数字。此外,OrderID
应该由特定客户订购的订单数量重放 - 在这种情况下,当不同商品的订单ID相等时,它是一个订单(例如,Alex做了2个订单(在第一个订单中)他订购了“牧马人牛仔裤”,在第二个订购了“Lee T-Shirt”);丹尼斯也订购了2个订单(第一个订购了“Aesics Shoes”,第二个订购了“Boss Jeans”和“阿迪达斯鞋子“)最后我想保持ArticleDescription
不受影响
答案 0 :(得分:1)
使用dplyr
的一种方式,
library(dplyr)
DB %>%
mutate(Name = as.integer(as.factor(CustomerName))) %>%
group_by(Name) %>%
mutate(No.of.Orders = data.table::rleid(OrderID)) %>%
select(-c(CustomerName, OrderID))
#Source: local data frame [10 x 4]
#Groups: Name [6]
# orderItemID ArticleDescription Name No.of.Orders
# (int) (fctr) (int) (int)
#1 1 Wrangler Jeans 1 1
#2 2 Lee T-Shirt 1 2
#3 3 Adidas Jacket 2 1
#4 4 Nike Shoes 3 1
#5 5 Puma Backpack 3 1
#6 6 Aesics Shoes 4 1
#7 7 Boss Jeans 4 2
#8 8 Adidas Shoes 4 2
#9 9 Puma Socks 5 1
#10 10 Nike Airs 6 1
答案 1 :(得分:0)
您可以轻松获取名称
number_of_orders <- table(DB$CustomerName)
name <- rep(1:length(unique(DB$CustomerName)),
number_of_orders)
但我认为亚历克斯的建议更好。
答案 2 :(得分:0)
library(dplyr)
DB %>% mutate(Name = dense_rank(CustomerName),
No.of.Orders=(ifelse(is.na(OrderID !=lag(OrderID)), TRUE, (OrderID !=lag(OrderID)))*1 )) %>%
group_by(CustomerName) %>%
mutate(No.of.Orders = cumsum(No.of.Orders))