分采样方法进行分位数回归的置信区间

时间:2018-08-15 16:41:15

标签: r confidence-interval economics subsampling quantile-regression

我现在正尝试像paper中的p.204一样执行子采样方法。但是在运行代码后,我发现 L_nb 值太低。而且我不知道代码中的问题在哪里。应该在40到50的块大小下工作。

以下是一些结果:

tau=0.50
1000 round / n=200 / b = 30 / mean(L_nb) = 0.09087719
1000 round / n=200 / b = 45 / mean(L_nb) = 0.1569231  (mean 0.2186538) tau=0.750
1000 round / n=200 / b = 60 / mean(L_nb) = 0.2421277  (mean 0.3753191) tau=0.750
1000 round / n=200 / b = 70 / mean(L_nb) = 0.3280916  (mean 0.4670229)  tau=0.750
1000 round / n=200 / b = 80 / mean(L_nb) = 0.4119008  (mean 0.57652893) tau=0.750

基本上,结果应接近第216页表2 中的图表。即使块大小为10到40,它们的结果也是如此。因此,我的代码中肯定有一些错误。

这是代码:

library(quantreg)

n <- 200 
b <- 45 #block size
tau = 0.50
set.seed(6789)
x <- matrix(rnorm(n,0,3),n,1)
q <- matrix(rnorm(n,0,1),n,1)
e <- matrix(rnorm(n,0,3),n,1)
w <- data.frame(cbind(x,q,e))
names(w)<-c("x","q","e")

yt <- ifelse(x<=quantile(x,0.65),0.5,1.2)
#plot(yt)
c <- as.numeric(quantile(e,tau))
y <- 1-c+yt*x+e
w_all <- data.frame(cbind(y,w))
names(w_all)<-c("y","x","q","e")

x_90th_q <- as.numeric(qnorm(0.9))  # x (90th quantile)

# start calculation
tau = 0.50
n = nrow(x)
k = 0.05
e = rq.fit.br(cbind(1,w_all$x),w_all$y, tau=tau ,interp=FALSE)$residuals
s0 <- sum(e*(tau-(e<0)))  
n1 <- round(.1*n)+k
n2 <- round(.9*n)-k
qs <- sort(q)
qs <- qs[n1:n2]
qs <- as.matrix(unique(qs))
qn <- nrow(qs)
sn <- matrix(0,qn,1)
for (r in 1:qn){
  d <- (q <= qs[r])
  xx <- (x) * (d %*% matrix(1,1,20*k))
  xx <- xx - x %*% rq(xx~x,tau,method="fn")$coefficients[2]-rq(xx~x,tau,method="fn")$coefficients[1]
  exw <- rq(e ~ xx, tau = tau)$rho
  sn[r] <- exw  
}
r <- which.min(sn)
smin <- sn[r]
qhat <- qs[r]

L_nb <- vector()
for(i in 1:1000){
  print(i)
  #set.seed(i)

  n = 200
  select_idx <- sample(1:n,b,replace=T) # subsampling
  w_b <- cbind(w_all[select_idx,])

  # start calculation
  tau = 0.50
  n = nrow(w_b)
  k = 0.05
  e_qr = rq.fit.br(cbind(1,w_b$x),w_b$y, tau=tau ,interp=FALSE)$residuals
  s0 <- sum(e_qr*(tau-(e_qr<0)))  
  n1 <- round(.1*n)+k
  n2 <- round(.9*n)-k
  qs <- sort(w_b$q)
  qs <- qs[n1:n2]
  qs <- as.matrix(unique(qs))
  qn <- nrow(qs)
  sn <- matrix(0,qn,1)
  for (r in 1:qn){
    d <- (w_b$q <= qs[r])
    xbxb <- (w_b$x)*(d %*% matrix(1,1,20*k))
    xbxb <- xbxb - w_b$x %*% t(rq(xbxb~w_b$x,tau,method="fn")$coefficients[2])-rq(xbxb~w_b$x,tau,method="fn")$coefficients[1]
    exbw <- rq(e_qr ~ xbxb, tau = tau)$rho
    sn[r] <- exbw  
  }
  r <- which.min(sn)
  smin <- sn[r]
  qb_hat <- qs[r]

  n=200
  L_nb[i] <-mean(sum(ifelse(abs(qb_hat-qhat)<=x_90th_q,1,0)))
  print(c(mean(1/(n-b+1)*b^0.9*L_nb),sum(L_nb)/1000))
  #print(c((L_nb),abs(qb_hat-qhat)))
}

希望有人可以帮助我解决问题。非常感谢!

0 个答案:

没有答案