R

时间:2015-10-12 13:48:08

标签: r machine-learning simulation maximize

我正在尝试'使用R中的模拟最大似然随机效应编制有序概率模型。

我已经改编了Chris Adolph的代码(http://faculty.washington.edu/cadolph/?page=21

set.seed(10234)
nobs <- 1000

x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)

#### Generate Halton Sequences
library("randtoolbox")
R <- 200
#a <- matrix(999, nrow=R, ncol=nobs)
a <- halton(n=nobs, dim=R, normal=T, init=T)

# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
    # preliminaries
    x <- as.matrix(x)
    os <- rep(1, nrow(x))
    x <- cbind(os, x)
    b <- param[1:ncol(x)]
    t2 <- param[(ncol(x)+1)]
    t3 <- param[(ncol(x)+2)]
    t4 <- param[(ncol(x)+3)]
    sigma_a <- param[ncol(x)+4]

    # probabilities and penalty function
    xb <- x %*% b %*% rep(1, R)
    asigma <- a * sigma_a

    p1 <- pnorm(- xb - asigma)
    if (t2 <= 0) {
        p2 <- -(abs(t2) * 10000)    # penalty function to keep t2>0
    } else {
        p2 <- pnorm(t2 - xb - asigma) - pnorm(- xb - asigma)
    }
    if (t3 <= t2) {
        p3 <- -((t2-t3)*10000)    # penalty to keep t3>t2
    } else {
        p3 <- pnorm(t3 - xb - asigma) - pnorm(t2 - xb - asigma)
    }
    if (t4 <= t3) {
        p4 <- -((t3 - t4) * 10000)
    } else {
        p4 <- pnorm(t4 - xb - asigma) - pnorm(t3 - xb - asigma) 
    }
    p5 <- 1 - pnorm(t4 - xb - asigma)

    p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
    p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
    p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
    p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
    p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)

    # -1 * log likelihood (optim is a minimizer)
    -sum(cbind(y==1, y==2, y==3, y==4, y==5) * cbind(p1, p2, p3, p4, p5))
}

# Use optim directly
ls.result <- lm(y~x)                    # use ls estimates as starting   values
stval <- c(ls.result$coefficients,1,2,3,2)  # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian = T)

但是,代码给了我以下错误:     应用错误(p3,MARGIN = 1,FUN =总和):         昏暗(X)必须具有正长度     来自:apply(p3,MARGIN = 1,FUN = sum)

我已经使用了debug()函数,我可以单独运行所有函数,并且可以在每一步中打印值。

1 个答案:

答案 0 :(得分:0)

问题是,只有当相应的参数值在允许的范围内时,才需要对正在执行的Halton序列进行平均。请注意,我在每个friend分支中移动了行log(apply(…))

if

然后成功运行:

set.seed(10234)
nobs <- 1000

x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)

#### Generate Halton Sequences
library("randtoolbox")
R <- 200
a <- halton(n=nobs, dim=R, normal=T, init=T)

# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
    # preliminaries
    x <- as.matrix(x)
    os <- rep(1, nrow(x))
    x <- cbind(os, x)
    b <- param[1:ncol(x)]
    t2 <- param[(ncol(x)+1)]
    t3 <- param[(ncol(x)+2)]
    t4 <- param[(ncol(x)+3)]
    sigma_a <- param[ncol(x)+4]

    # probabilities and penalty function
    xb <- x %*% b %*% rep(1, R)
    asigma <- a*sigma_a

    p1 <- pnorm(-xb-asigma)
    p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)

    if (t2 <= 0) {
        p2 <- -(abs(t2) * 10000)    # penalty function to keep t2>0
    } else {
        p2 <- pnorm(t2-xb-asigma)-pnorm(-xb-asigma)
        p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
    }
    if (t3 <= t2) {
        p3 <- -((t2-t3)*10000)    # penalty to keep t3>t2
    } else {
        p3 <- pnorm(t3-xb-asigma)-pnorm(t2-xb-asigma)   
        p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
    }
    if (t4 <= t3) {
        p4 <- -((t3-t4)*10000)
    } else {
        p4 <- pnorm(t4-xb-asigma)-pnorm(t3-xb-asigma) 
        p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
    }
    p5 <- 1 - pnorm(t4-xb-asigma)
    p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)

    # -1 * log likelihood (optim is a minimizer)
    -sum(cbind(y==1,y==2,y==3,y==4, y==5) * cbind(p1,p2,p3,p4,p5))
}

# Use optim directly
ls.result <- lm(y~x)                    # use ls estimates as starting   values
stval <- c(ls.result$coefficients,1,2,3,2)  # initial guesses

oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian=T, control = list(trace = 10, REPORT = 1))

产生结果:

...
iter  20 value 1567.966484
iter  21 value 1567.966434
iter  22 value 1567.966389
iter  23 value 1567.966350
iter  23 value 1567.966349
iter  23 value 1567.966345
final  value 1567.966345 
converged