使用R进行RFM分析

时间:2018-07-09 14:20:43

标签: r

我对R很陌生,在这里又很新。
下面是源代码。但是,结果是不正确的,其99.9%的频率和货币排名为1,而R的排名都不为5。有人可以帮忙吗?非常感激!!

setwd('\\Users\\stang\\Documents\\R\\RFM')
bmdata = read.csv("Customer sales 103116-103117.txt",header=TRUE,sep=",")
dim(bmdata) [1] 1094964       3

str(bmdata) 'data.frame':   1094964 obs. of  3 variables:  $ customer_ID: num  1 1 1 1 1 1 1 1 1 1 ...  $ sales_date : Factor w/ 366 levels "1/1/2017 0:00:00",..: 86 66 81 82 84 85 105 116 122 124 ...  $ sales      : num  182 120 91 63 58 56 251 24 269 113 ...

创建新近度

bmdata$Recency <- round(as.numeric(difftime(Sys.Date(),bmdata[,2],unit="days")))
head(bmdata)
  customer_ID sales_date sales Recency
1           1 2016-11-03   182     368
2           1 2016-11-11   120     360
3           1 2016-11-25    91     346
4           1 2016-11-26    63     345
5           1 2016-11-28    58     343
6           1 2016-11-29    56     342

新近度,频率和货币的创造

bmdataR <- aggregate(bmdata[,4],list(bmdata$customer_ID),min)
names(bmdataR) <- c("customer_ID","Recency")
head(bmdataR)
  customer_ID Recency
1           1      10
2           2      13
3           3      12
4           6      37
5           7      25
6           9       7

bmdataF <- aggregate(bmdata[,2],list(bmdata$customer_ID),length)
names(bmdataF) <- c("customer_ID","Frequency")
head(bmdataF)
  customer_ID Frequency
1           1        52
2           2        39
3           3       117
4           6        47
5           7        52
6           9        33

bmdataM <- aggregate(bmdata[,3],list(bmdata$customer_ID),sum)
names(bmdataM) <- c("customer_ID","Monetary")
head(bmdataM)
  customer_ID Monetary
1           1  6432.75
2           2  3005.60
3           3 27889.52
4           6  5573.05
5           7  6513.20
6           9  2889.40

每个唯一客户ID的组合RFM

bmdataRFM <- data.frame(bmdataR,bmdataF,bmdataM)

计算每次访问的销售额

bmdataRFM$salespervisit <- bmdataRFM$Monetary/bmdataRFM$Frequency

R,F,M组合

temp <- merge(bmdataF,bmdataR,"customer_ID")
bmdataRFM2 <- merge(temp,bmdataM,"customer_ID")
head(temp)
  customer_ID Frequency Recency
1           1        52      10
2           2        39      13
3           3       117      12
4           6        47      37
5           7        52      25
6           9        33       7
# creation of R,F,M rank
bmdataRFM$rankR <- cut(bmdataRFM$Recency,5,labels=F)
bmdataRFM$rankF <- cut(bmdataRFM$Frequency,5,labels=F) bmdataRFM$rankM <- cut(bmdataRFM$Monetary,5,labels=F)
#Analysis
groupRFM <- bmdataRFM$rankR*100 + bmdataRFM$rankF*10 + bmdataRFM$rankM
bmdataRFM <- cbind(bmdataRFM,groupRFM)

1 个答案:

答案 0 :(得分:0)

我已经尝试了所有脚本,也许是数据问题,但是,我创建了一些类似于您的假数据:

set.seed(12)
customer_ID <- sample(1:500, 500, rep = TRUE)
sales_date <- sample(seq(as.Date('2016/01/01'), as.Date('2017/12/01'), by="day"), 500)
sales <- sample(20:400, 500, rep = TRUE)
bmdata <- data.frame (customer_ID,sales_date,sales)
head(bmdata)
  customer_ID sales_date sales
1          35 2016-08-15    43
2         409 2016-09-02    79
3         472 2016-11-20   327
4         135 2016-05-08   191
5          85 2016-12-25   217
6          17 2017-05-22   242

然后,像您一样,我用您的脚本RFM进行了计算:

# recency
bmdata$Recency <- round(as.numeric(difftime(Sys.Date(),bmdata[,2],unit="days")))

bmdataR <- aggregate(bmdata[,4],list(bmdata$customer_ID),min)
names(bmdataR) <- c("customer_ID","Recency")

# frequency
bmdataF <- aggregate(bmdata[,2],list(bmdata$customer_ID),length)
names(bmdataF) <- c("customer_ID","Frequency")

# monetary
bmdataM <- aggregate(bmdata[,3],list(bmdata$customer_ID),sum)
names(bmdataM) <- c("customer_ID","Monetary")

# RFM dataframe
bmdataRFM <-merge(merge(bmdataR,bmdataF),bmdataM)

# this is commented because I've not used it.
# bmdataRFM$salespervisit <- bmdataRFM$Monetary/bmdataRFM$Frequency


# here the ranks
bmdataRFM$rankR <- cut(bmdataRFM$Recency,5,labels=F)
bmdataRFM$rankF <- cut(bmdataRFM$Frequency,5,labels=F)
bmdataRFM$rankM <- cut(bmdataRFM$Monetary,5,labels=F)

 > head(bmdataRFM)
  customer_ID Recency Frequency Monetary rankR rankF rankM
1           1     338         1      145     1     1     1
2           2     633         1      268     3     1     1
3           5     573         1      119     3     1     1
4           7     439         1      290     2     1     1
5           8     580         3      835     3     3     3
6          10     344         1      192     1     1     1

而且,如您所见,似乎一切正常。

round(prop.table(table(bmdataRFM$rankR))*100,2)
    1     2     3     4     5 
28.00 23.08 19.08 15.69 14.15 

round(prop.table(table(bmdataRFM$rankF))*100,2) 
    1     2     3     4     5 
61.23 26.46  9.85  2.15  0.31 

round(prop.table(table(bmdataRFM$rankM))*100,2)    
    1     2     3     4     5 
68.62 26.77  3.69  0.62  0.31 

现在,分组的标签:

groupRFM <- bmdataRFM$rankR*100 + bmdataRFM$rankF*10 + bmdataRFM$rankM
bmdataRFM <- cbind(bmdataRFM,groupRFM)
head(bmdataRFM)
  customer_ID Recency Frequency Monetary rankR rankF rankM groupRFM
1           1     338         1      145     1     1     1      111
2           2     633         1      268     3     1     1      311
3           5     573         1      119     3     1     1      311
4           7     439         1      290     2     1     1      211
5           8     580         3      835     3     3     3      333
6          10     344         1      192     1     1     1      111

现在,您可以看到个体如何:

library(ggplot2)
p<-ggplot(data=analysis, aes(x=reorder(Var1,Freq), y=Freq, label = Freq)) +
  geom_bar(stat="identity") + coord_flip() + geom_text()
p

enter image description here

看来一切正常,也许您可​​以查看自己的数据。