使用 的> params< - pnbd.EstimateParameters(cal.cbs)
从BTYD软件包我得到以下错误:
" optim(logparams,pnbd.eLL,cal.cbs = cal.cbs,max.param.value = max.param.value,L-BFGS-B需要有限值&#39 ; FN '"
这是什么意思?这个错误的原因是什么?我的cbs(客户通过足够的统计数据)矩阵是21394 3大,有所需的colums:x,t.x,T.cal。
信息cbs:
答案 0 :(得分:3)
我花了一些时间来阅读和更改源代码,最后在计算对数似然和修复期间找出计算错误。您可以运行下面的代码并调用pnbd.EstimateParameters.ori()函数再次尝试。它适用于我的情况(我有完全相同的错误)。
pnbd.cbs.LL.ori =
function (params, cal.cbs)
{
dc.check.model.params(c("r", "alpha", "s", "beta"), params,
"pnbd.cbs.LL")
tryCatch(x <- cal.cbs[, "x"], error = function(e) stop("Error in pnbd.cbs.LL: cal.cbs must have a frequency column labelled \"x\""))
tryCatch(t.x <- cal.cbs[, "t.x"], error = function(e) stop("Error in pnbd.cbs.LL: cal.cbs must have a recency column labelled \"t.x\""))
tryCatch(T.cal <- cal.cbs[, "T.cal"], error = function(e) stop("Error in pnbd.cbs.LL: cal.cbs must have a column for length of time observed labelled \"T.cal\""))
if ("custs" %in% colnames(cal.cbs)) {
custs <- cal.cbs[, "custs"]
}
else {
custs <- rep(1, length(x))
}
return(sum(custs * pnbd.LL.ori(params, x, t.x, T.cal)))## changed
}
pnbd.LL.ori =
function (params, x, t.x, T.cal)
{
max.length <- max(length(x), length(t.x), length(T.cal))
if (max.length%%length(x))
warning("Maximum vector length not a multiple of the length of x")
if (max.length%%length(t.x))
warning("Maximum vector length not a multiple of the length of t.x")
if (max.length%%length(T.cal))
warning("Maximum vector length not a multiple of the length of T.cal")
dc.check.model.params(c("r", "alpha", "s", "beta"), params,
"pnbd.LL")
if (any(x < 0) || !is.numeric(x))
stop("x must be numeric and may not contain negative numbers.")
if (any(t.x < 0) || !is.numeric(t.x))
stop("t.x must be numeric and may not contain negative numbers.")
if (any(T.cal < 0) || !is.numeric(T.cal))
stop("T.cal must be numeric and may not contain negative numbers.")
x <- rep(x, length.out = max.length)
t.x <- rep(t.x, length.out = max.length)
T.cal <- rep(T.cal, length.out = max.length)
r <- params[1]
alpha <- params[2]
s <- params[3]
beta <- params[4]
maxab <- max(alpha, beta)
absab <- abs(alpha - beta)
param2 <- s + 1
if (alpha < beta) {
param2 <- r + x
}
part1 <- r * log(alpha) + s * log(beta) - lgamma(r) + lgamma(r +
x)
part2 <- -(r + x) * log(alpha + T.cal) - s * log(beta + T.cal)
if (absab == 0) {
F1 <- -(r + s + x) * log(maxab + t.x)
F2 <- -(r + s + x) * log(maxab + T.cal)
partF <- subLogs.ori(F1, F2)## changed
}
else {
F1 <- hyperg_2F1(r + s + x, param2, r + s + x + 1, absab/(maxab +
t.x))/((maxab + t.x)^(r + s + x))
F2 <- hyperg_2F1(r + s + x, param2, r + s + x + 1, absab/(maxab +
T.cal))/((maxab + T.cal)^(r + s + x))
partF <- log(F1 - F2)
}
part3 <- log(s) - log(r + s + x) + partF
## modified
result = part1+ part2+ log(1 + exp(part3 - part2))
return(result)
}
subLogs.ori =
function (loga, logb)
{
## this function is modified
myvec = loga - logb
sel = myvec <30
result = rep(0,length(myvec))
result[sel] = logb[sel] + log(exp(loga[sel] - logb[sel]) - 1)
result[!sel] = loga[!sel]
return(result)
}
pnbd.EstimateParameters.ori = function (cal.cbs, par.start = c(1, 1, 1, 1), max.param.value = 10000)
{
dc.check.model.params(c("r", "alpha", "s", "beta"), par.start,
"pnbd.EstimateParameters")
pnbd.eLL <- function(params, cal.cbs, max.param.value) {
params <- exp(params)
params[params > max.param.value] <- max.param.value
return(-1 * pnbd.cbs.LL.ori(params, cal.cbs))## changed
}
logparams <- log(par.start)
results <- optim(logparams, pnbd.eLL, cal.cbs = cal.cbs,
max.param.value = max.param.value, method = "L-BFGS-B")
estimated.params <- exp(results$par)
estimated.params[estimated.params > max.param.value] <- max.param.value
return(estimated.params)
}
params <- pnbd.EstimateParameters.ori(cal.cbs)
答案 1 :(得分:0)
你可以试试这个:
R> cal.cbs1 = subset(cal.cbs, x<100)
R> params <- pnbd.EstimateParameters(cal.cbs1)
据推测,没有客户购买(x)的次数超过他/她观察到的天数(T.cal),因为该算法假设每个用户每天最多只能购买一次。如果一个用户进行了两次以上的购买,dc.MergeTransactionsOnSameDate函数应该将它们合并为每天一次购买。所以先试试小x。
另外我确实认为pnbd.EstimateParaters()函数存在一些计算错误,因为它在R中调用了optim()函数。您看到的错误消息来自optim()函数。
答案 2 :(得分:0)
对于大型数据集,该函数失败。因此,有一个称为BTYD2的BTYD软件包的修补版本。它不在CRAN上,在R中使用之前,您需要先构建软件包。
https://github.com/ghuiber/BTYD2
使用BTYD软件包的修补版本解决了我的问题。
答案 3 :(得分:0)
首先,更改为代码源:
trace("bgnbd.EstimateParameters",edit=TRUE)
第二,更改方法:
在以下代码中:
function (cal.cbs, par.start = c(1, 3, 1, 3), max.param.value = 10000)
{
dc.check.model.params(c("r", "alpha", "a", "b"), par.start,
"bgnbd.EstimateParameters")
bgnbd.eLL <- function(params, cal.cbs, max.param.value) {
params <- exp(params)
params[params > max.param.value] = max.param.value
return(-1 * bgnbd.cbs.LL(params, cal.cbs))
}
logparams = log(par.start)
results = optim(logparams, bgnbd.eLL, cal.cbs = cal.cbs,
max.param.value = max.param.value, method = "BFGS") # j ai changé cela (I change the methode )
estimated.params <- exp(results$par)
estimated.params[estimated.params > max.param.value] <- max.param.value
return(estimated.params)
}