我对R很陌生,在这里又很新。
下面是源代码。但是,结果是不正确的,其99.9%的频率和货币排名为1,而R的排名都不为5。有人可以帮忙吗?非常感激!!
setwd('\\Users\\stang\\Documents\\R\\RFM')
bmdata = read.csv("Customer sales 103116-103117.txt",header=TRUE,sep=",")
dim(bmdata) [1] 1094964 3
str(bmdata) 'data.frame': 1094964 obs. of 3 variables: $ customer_ID: num 1 1 1 1 1 1 1 1 1 1 ... $ sales_date : Factor w/ 366 levels "1/1/2017 0:00:00",..: 86 66 81 82 84 85 105 116 122 124 ... $ sales : num 182 120 91 63 58 56 251 24 269 113 ...
创建新近度
bmdata$Recency <- round(as.numeric(difftime(Sys.Date(),bmdata[,2],unit="days")))
head(bmdata)
customer_ID sales_date sales Recency
1 1 2016-11-03 182 368
2 1 2016-11-11 120 360
3 1 2016-11-25 91 346
4 1 2016-11-26 63 345
5 1 2016-11-28 58 343
6 1 2016-11-29 56 342
新近度,频率和货币的创造
bmdataR <- aggregate(bmdata[,4],list(bmdata$customer_ID),min)
names(bmdataR) <- c("customer_ID","Recency")
head(bmdataR)
customer_ID Recency
1 1 10
2 2 13
3 3 12
4 6 37
5 7 25
6 9 7
bmdataF <- aggregate(bmdata[,2],list(bmdata$customer_ID),length)
names(bmdataF) <- c("customer_ID","Frequency")
head(bmdataF)
customer_ID Frequency
1 1 52
2 2 39
3 3 117
4 6 47
5 7 52
6 9 33
bmdataM <- aggregate(bmdata[,3],list(bmdata$customer_ID),sum)
names(bmdataM) <- c("customer_ID","Monetary")
head(bmdataM)
customer_ID Monetary
1 1 6432.75
2 2 3005.60
3 3 27889.52
4 6 5573.05
5 7 6513.20
6 9 2889.40
每个唯一客户ID的组合RFM
bmdataRFM <- data.frame(bmdataR,bmdataF,bmdataM)
计算每次访问的销售额
bmdataRFM$salespervisit <- bmdataRFM$Monetary/bmdataRFM$Frequency
R,F,M组合
temp <- merge(bmdataF,bmdataR,"customer_ID")
bmdataRFM2 <- merge(temp,bmdataM,"customer_ID")
head(temp)
customer_ID Frequency Recency
1 1 52 10
2 2 39 13
3 3 117 12
4 6 47 37
5 7 52 25
6 9 33 7
# creation of R,F,M rank
bmdataRFM$rankR <- cut(bmdataRFM$Recency,5,labels=F)
bmdataRFM$rankF <- cut(bmdataRFM$Frequency,5,labels=F) bmdataRFM$rankM <- cut(bmdataRFM$Monetary,5,labels=F)
#Analysis
groupRFM <- bmdataRFM$rankR*100 + bmdataRFM$rankF*10 + bmdataRFM$rankM
bmdataRFM <- cbind(bmdataRFM,groupRFM)
答案 0 :(得分:0)
我已经尝试了所有脚本,也许是数据问题,但是,我创建了一些类似于您的假数据:
set.seed(12)
customer_ID <- sample(1:500, 500, rep = TRUE)
sales_date <- sample(seq(as.Date('2016/01/01'), as.Date('2017/12/01'), by="day"), 500)
sales <- sample(20:400, 500, rep = TRUE)
bmdata <- data.frame (customer_ID,sales_date,sales)
head(bmdata)
customer_ID sales_date sales
1 35 2016-08-15 43
2 409 2016-09-02 79
3 472 2016-11-20 327
4 135 2016-05-08 191
5 85 2016-12-25 217
6 17 2017-05-22 242
然后,像您一样,我用您的脚本RFM进行了计算:
# recency
bmdata$Recency <- round(as.numeric(difftime(Sys.Date(),bmdata[,2],unit="days")))
bmdataR <- aggregate(bmdata[,4],list(bmdata$customer_ID),min)
names(bmdataR) <- c("customer_ID","Recency")
# frequency
bmdataF <- aggregate(bmdata[,2],list(bmdata$customer_ID),length)
names(bmdataF) <- c("customer_ID","Frequency")
# monetary
bmdataM <- aggregate(bmdata[,3],list(bmdata$customer_ID),sum)
names(bmdataM) <- c("customer_ID","Monetary")
# RFM dataframe
bmdataRFM <-merge(merge(bmdataR,bmdataF),bmdataM)
# this is commented because I've not used it.
# bmdataRFM$salespervisit <- bmdataRFM$Monetary/bmdataRFM$Frequency
# here the ranks
bmdataRFM$rankR <- cut(bmdataRFM$Recency,5,labels=F)
bmdataRFM$rankF <- cut(bmdataRFM$Frequency,5,labels=F)
bmdataRFM$rankM <- cut(bmdataRFM$Monetary,5,labels=F)
> head(bmdataRFM)
customer_ID Recency Frequency Monetary rankR rankF rankM
1 1 338 1 145 1 1 1
2 2 633 1 268 3 1 1
3 5 573 1 119 3 1 1
4 7 439 1 290 2 1 1
5 8 580 3 835 3 3 3
6 10 344 1 192 1 1 1
而且,如您所见,似乎一切正常。
round(prop.table(table(bmdataRFM$rankR))*100,2)
1 2 3 4 5
28.00 23.08 19.08 15.69 14.15
round(prop.table(table(bmdataRFM$rankF))*100,2)
1 2 3 4 5
61.23 26.46 9.85 2.15 0.31
round(prop.table(table(bmdataRFM$rankM))*100,2)
1 2 3 4 5
68.62 26.77 3.69 0.62 0.31
现在,分组的标签:
groupRFM <- bmdataRFM$rankR*100 + bmdataRFM$rankF*10 + bmdataRFM$rankM
bmdataRFM <- cbind(bmdataRFM,groupRFM)
head(bmdataRFM)
customer_ID Recency Frequency Monetary rankR rankF rankM groupRFM
1 1 338 1 145 1 1 1 111
2 2 633 1 268 3 1 1 311
3 5 573 1 119 3 1 1 311
4 7 439 1 290 2 1 1 211
5 8 580 3 835 3 3 3 333
6 10 344 1 192 1 1 1 111
现在,您可以看到个体如何:
library(ggplot2)
p<-ggplot(data=analysis, aes(x=reorder(Var1,Freq), y=Freq, label = Freq)) +
geom_bar(stat="identity") + coord_flip() + geom_text()
p
看来一切正常,也许您可以查看自己的数据。