在R中计算NTILE用于RFM分析

时间:2016-04-27 20:56:16

标签: r dplyr

我正在尝试创建一个基于新近度,频率和货币计算10个百分点的数据框。我已经完成了大部分设置,但是当我要求10时,我无法弄清楚为什么我的代码返回三个NTILES。我现在处于静止状态。下一步将计算每个ntile中客户的百分比。

这是我的代码:

rm(list = ls())
setwd("/Users/a76475/Documents/Customer_Analytics")
rfm<-read.csv("cdnow_students_transaction.csv")

#Set up test and calibration samples   
rfm$DATE <- as.Date(rfm$DATE, format = "%m/%d/%y")
calib <- subset(rfm, rfm$DATE<"1997-09-29")
valid <- subset(rfm, rfm$DATE>"1997-09-30")

#Aggregate for frequency, monetary, and recency -- Calibration
recency<- aggregate(DATE ~ ID, data =calib, FUN = max)
colnames(recency) <- c("ID","Recency")
frequency <- aggregate(DOLLARS ~ ID, data =calib, FUN = length)
colnames(frequency) <- c("ID","Frequency")
monetary <- aggregate(DOLLARS ~ ID, data =calib, FUN = mean)
colnames(frequency) <- c("ID","Monetary")
calib <- merge(frequency, monetary, by = "ID")
calib <- merge(calib, recency, by = "ID")

#Aggregate for frequency, monetary, and recency -- Validation
recency<- aggregate(DATE ~ ID, data =valid, FUN = max)
colnames(recency) <- c("ID","Recency") 
frequency <- aggregate(DOLLARS ~ ID, data =valid, FUN = length)
colnames(frequency) <- c("ID","Frequency")
monetary <- aggregate(DOLLARS ~ ID, data =valid, FUN = mean)
colnames(frequency) <- c("ID","Monetary")
valid <- merge(frequency, monetary, by = "ID")
valid <- merge(valid, recency, by = "ID")

colnames(valid) <- c("ID","FREQ","MONETARY","RECENCY")
colnames(calib) <- c("ID","FREQ","MONETARY","RECENCY")
calib$RECENCY <- NULL

#Create recency score
#For validation
for (i in 1:nrow(valid)) {
  valid$RECENCY1[i] = as.numeric(max(valid$RECENCY) - valid$RECENCY[i])
}
valid$RECENCY <- valid$RECENCY1
valid$RECENCY1 <- NULL

#For calibration
for (i in 1:nrow(calib)) {
  calib$RECENCY1[i] = as.numeric(max(valid$RECENCY) - calib$RECENCY[i])
}
calib$RECENCY <- calib$RECENCY1
calib$RECENCY1 <- NULL

#Merge datasets
rfm <- merge(calib,valid, by="ID", all.x = TRUE)

#Create Column for retention%
require(dplyr)
rfm$monetary.ntile <- ntile(rfm$MONETARY.y,10)
rfm$freq.ntile <- ntile(rfm$FREQ.y,10)
rfm$recency.ntile <- ntile(rfm$RECENCY,10)

1 个答案:

答案 0 :(得分:0)

例如,如果您想要10个桶用于新近度,频率和货币比率:

set.seed(1)
n <- 100
df <- data.frame(
  R = runif(n, 1, 365),
  F = runif(n, 1, 5),
  M = runif(n, 0, 100)
)
apply(df, 2, function(col) {
  breaks <- quantile(col, probs=seq(0, 1, length.out = 10))
  findInterval(col, breaks)
})