我正在实现自定义似然函数,并且希望使用maxLik-library。
在全局环境中将参数向量定义为变量时,函数my_function
可以正常工作。 beta <- c(0.12, 0.28)
和bm.loglikFun
函数之外。因为我有两个解释变量/特征,所以有两个系数。但是,当我尝试使用适用于maxLik-library的符号时,我收到一个错误。到目前为止,我的代码如下:
# libraries
library(pracma)
library(maxLik)
library(readxl)
# Bringing in the data
my_data <- read_excel("mock_data.xlsx", sheet = 3)
######### ---------------------------------------------- BEGIN HELPER FUNCTIONS ---------------------------------#####################
# Helper function for drift and diffusion to account for firms who never reach the next stage
exp_decay <- function(v, ν, τ) {
tryCatch(
expr = {
return(ifelse(v < ν, 1, exp(-((v - ν) / τ))))
},
warning = function(w){
return(0)
}
)
}
# Helper function for diffusion
g <- function(z) {
0.5 * (z + sqrt(z^2 + 0.25))
}
# Computing drift
calculate_drift <- function(v_drift, ν_drift, τ_drift, X_drift, β_drift) {
return((dot(X_drift, β_drift)) * exp_decay(v_drift, ν_drift, τ_drift))
}
# Computing diffusion
calculate_diffusion <- function(v_diff, ν_diff, τ_diff, X_diff, γ_diff) {
tryCatch(
expr = {
return((g(dot(X_diff, γ_diff))^2) * exp_decay(v_diff, ν_diff, τ_diff))
},
warning = function(w){
return(0)
}
)
}
# Computing M
calculate_M <- function(lower_lim, upper_lim, covars) {
tryCatch(
expr = {
if (is.na(lower_lim) || is.na(upper_lim)) {
return(0)
} else {
temp_M <- integrate(calculate_drift, lower_lim, upper_lim, ν_drift=nu, τ_drift=tau, X_drift=covars, β_drift=β_c)
return(temp_M$val)
}
},
warning = function(w){
return(0)
}
)
}
# Computing S
calculate_S <- function(lower_lim, upper_lim, covars) {
tryCatch(
expr = {
if (is.na(lower_lim) || is.na(upper_lim)) {
return(0)
} else {
temp_S <- 0.5 * integral(calculate_diffusion, lower_lim, upper_lim, ν_diff=nu, τ_diff=tau, X_diff=covars, γ_diff=γ_c)
return(temp_S)
}
},
warning = function(w){
return(0)
}
)
}
# Computing LL terms
calculate_LL_term <- function(diffusion, round_size, S_round, M_round) {
if (is.na(round_size) || diffusion == 0 || S_round == 0 || M_round == 0) {
return(1)
} else {
temp_LL_term <- ((diffusion * round_size) / (sqrt(16 * pi * S_round^3))) * exp(-((round_size - M_round)^2) / (4 * S_round))
return(temp_LL_term)
}
}
# For non-exits
calculate_LL_pt1 <- function(M_current, size_current, S_current) {
temp_LL_pt1 = pnorm((M_current - size_current) / (sqrt(2 * S_current))) + exp((M_current * size_current) / S_current) * pnorm((-(M_current + size_current)) / (sqrt(2 * S_current)))
return(temp_LL_pt1)
}
######### ----------------------------------------------END HELPER FUNCTIONS ---------------------------------#####################
########################################## Defining the log-likelihood function ##########################################
bm.loglikFun <- function(theta) {
myFunction <- function(x) {
# Drift for each round (returns 0 if NA) ##### TROUBLES START HERE ########
drift_A = calculate_drift(as.numeric(unlist(x["years_to_A"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_B = calculate_drift(as.numeric(unlist(x["years_to_B"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_C = calculate_drift(as.numeric(unlist(x["years_to_C"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_D = calculate_drift(as.numeric(unlist(x["years_to_D"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_E = calculate_drift(as.numeric(unlist(x["years_to_E"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_F = calculate_drift(as.numeric(unlist(x["years_to_F"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
drift_exit = calculate_drift(as.numeric(unlist(x["years_to_exit"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
# Used as part of Equation 4.6
drift_curr = calculate_drift(as.numeric(as.character(x["years_since_founding"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
# Diffusion for each round (returns 0 if NA)
diff_A = calculate_diffusion(as.numeric(unlist(x["years_to_A"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_B = calculate_diffusion(as.numeric(unlist(x["years_to_B"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_C = calculate_diffusion(as.numeric(unlist(x["years_to_C"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_D = calculate_diffusion(as.numeric(unlist(x["years_to_D"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_E = calculate_diffusion(as.numeric(unlist(x["years_to_E"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_F = calculate_diffusion(as.numeric(unlist(x["years_to_F"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
diff_exit = calculate_diffusion(as.numeric(unlist(x["years_to_exit"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
# Used as part of Equation 4.6
diff_curr = calculate_diffusion(as.numeric(as.character(x["years_since_founding"])), theta[1], theta[2], as.numeric(as.character(x[2:3])), theta[4])
# M values
M_A = calculate_M(as.numeric(unlist(x["A_prev_years"])), as.numeric(unlist(x["years_to_A"])), as.numeric(unlist(x[2:3])))
M_B = calculate_M(as.numeric(unlist(x["B_prev_years"])), as.numeric(unlist(x["years_to_B"])), as.numeric(unlist(x[2:3])))
M_C = calculate_M(as.numeric(unlist(x["C_prev_years"])), as.numeric(unlist(x["years_to_C"])), as.numeric(unlist(x[2:3])))
M_D = calculate_M(as.numeric(unlist(x["D_prev_years"])), as.numeric(unlist(x["years_to_D"])), as.numeric(unlist(x[2:3])))
M_E = calculate_M(as.numeric(unlist(x["E_prev_years"])), as.numeric(unlist(x["years_to_E"])), as.numeric(unlist(x[2:3])))
M_F = calculate_M(as.numeric(unlist(x["F_prev_years"])), as.numeric(unlist(x["years_to_F"])), as.numeric(unlist(x[2:3])))
M_exit = calculate_M(as.numeric(unlist(x["Exit_prev_years"])), as.numeric(unlist(x["years_to_exit"])), as.numeric(unlist(x[2:3])))
# Used as part of Equation 4.6
M_curr = calculate_M(as.numeric(unlist(x["years_to_current_round"])), as.numeric(unlist(x["years_since_founding"])), as.numeric(unlist(x[2:3])))
# S values
S_A = calculate_S(as.numeric(unlist(x["A_prev_years"])), as.numeric(unlist(x["years_to_A"])), as.numeric(unlist(x[2:3])))
S_B = calculate_S(as.numeric(unlist(x["B_prev_years"])), as.numeric(unlist(x["years_to_B"])), as.numeric(unlist(x[2:3])))
S_C = calculate_S(as.numeric(unlist(x["C_prev_years"])), as.numeric(unlist(x["years_to_C"])), as.numeric(unlist(x[2:3])))
S_D = calculate_S(as.numeric(unlist(x["D_prev_years"])), as.numeric(unlist(x["years_to_D"])), as.numeric(unlist(x[2:3])))
S_E = calculate_S(as.numeric(unlist(x["E_prev_years"])), as.numeric(unlist(x["years_to_E"])), as.numeric(unlist(x[2:3])))
S_F = calculate_S(as.numeric(unlist(x["F_prev_years"])), as.numeric(unlist(x["years_to_F"])), as.numeric(unlist(x[2:3])))
S_exit = calculate_S(as.numeric(unlist(x["Exit_prev_years"])), as.numeric(unlist(x["years_to_exit"])), as.numeric(unlist(x[2:3])))
# Used as part of Equation 4.6
S_curr = calculate_S(as.numeric(unlist(x["years_to_current_round"])), as.numeric(unlist(x["years_since_founding"])), as.numeric(unlist(x[2:3])))
# LL_terms
LL_A = calculate_LL_term(diff_A, as.numeric(unlist(x["size_A"])), S_A, M_A)
LL_B = calculate_LL_term(diff_B, as.numeric(unlist(x["size_B"])), S_A, M_A)
LL_C = calculate_LL_term(diff_C, as.numeric(unlist(x["size_C"])), S_A, M_A)
LL_D = calculate_LL_term(diff_D, as.numeric(unlist(x["size_D"])), S_A, M_A)
LL_E = calculate_LL_term(diff_E, as.numeric(unlist(x["size_E"])), S_A, M_A)
LL_F = calculate_LL_term(diff_F, as.numeric(unlist(x["size_F"])), S_A, M_A)
LL_exit = calculate_LL_term(diff_exit, as.numeric(unlist(x["size_exit"])), S_A, M_A)
# Equation 4.5 and second term of Equation 4.6
LL_final_exit = LL_A * LL_B * LL_C * LL_D * LL_E * LL_F * LL_exit
# For non-exits
LL_pt1 = calculate_LL_pt1(M_curr, as.numeric(unlist(x["size_current_round"])), S_curr)
# Equation 4.6 (if the company did not exit)
LL_final_no_exit = (1 - LL_pt1) * LL_final_exit
# The function needs to return different LL-values depending on whether the company has exited
ifelse(x["exit"] == 1, LL_final_exit, LL_final_no_exit)
}
my_data$c <- apply(my_data, 1, myFunction)
return(sum(log(my_data$c)))
}
# Calling the optimization algorithm
start = c(3, 3, .1, .1, .1, .1)
names(start) = c("nu", "tau", "beta_leadership_age", "beta_number_of_founders", "gamma_leadership_age", "gamma_number_of_founders")
mle_res <- maxLik( logLik = bm.loglikFun, start = start, method="BFGS" )
print(summary(mle_res))
完整错误如下:
Error in Conj(x) * y : non-conformable arrays
15.
apply(Conj(x) * y, 2, sum)
14.
dot(X_drift, β_drift)
13.
calculate_drift(as.numeric(unlist(x["years_to_A"])), theta[1],
theta[2], as.numeric(unlist(x[2:3])), theta[3])
12.
FUN(newX[, i], ...)
11.
apply(my_data, 1, myFunction)
10.
fnOrig(theta, ...)
9.
logLikFunc(theta, fnOrig = function (theta)
{
calculate_drift <- function(v_drift, ν_drift, τ_drift,
X_drift, β_drift) { ...
8.
eval(f, sys.frame(sys.parent()))
7.
eval(f, sys.frame(sys.parent()))
6.
callWithoutArgs(theta, fName = fName, args = names(formals(sumt)),
...)
5.
(function (theta, fName, ...)
{
return(callWithoutArgs(theta, fName = fName, args = names(formals(sumt)),
...)) ...
4.
do.call(callWithoutSumt, argList)
3.
maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS",
fixed = fixed, constraints = constraints, finalHessian = finalHessian,
parscale = parscale, control = mControl, ...)
2.
maxRoutine(fn = logLik, grad = grad, hess = hess, start = start,
constraints = constraints, ...)
1.
maxLik(logLik = bm.loglikFun, start = start, method = "BFGS")
理想情况下,当不使用bm.loglikFun
函数时,就像我能够做到的那样,我可以将参数向量与特征向量相乘。