# Second stage of the simulations:  lasso selection of discrete covariates

# Construct X matrix for the partially linear effect
# n by p design matrix the first q covariates are rho-equicorrelated 
# y depends on only the first r covariates

   require(doMC) # this loads multicore and foreach automatically too.
   registerDoMC(8) # intention to use 8 cores
   require(quantreg)
   require(MASS)
   options(warn = -1) # turn off warnings about tiny diagonals


   fitrqss <- function(X,z,y, lassolambda){
     g <- function(lam,y,X,z,lassolambda) AIC(rqss(y ~ X + qss(z, lambda = lam), 
	       method = "lasso", lambda = lassolambda),k = -1) 
     lamstar <- optimize(g,interval = c(.001,.5), X = X, z = z, 
               y = y, lassolambda = lassolambda)
     f <- rqss(y ~ X + qss(z, lambda = lamstar$min),
               method = "lasso", lambda = lassolambda)
     pvals <- summary(f)$coef[,4]
     B <- plot(f,bands = "both")
     bandg <- B[[1]]
     gtrue <- g0(bandg$x,4)
     lines(bandg$x, gtrue, col = "red")
     ghat <- (bandg$bhi + bandg$blo)/2
     gmise <- mean((ghat - gtrue)^2)
     gmiae <- mean(abs(ghat - gtrue))
     gcove <- 1 - mean((gtrue < bandg$blo[,2]) | (gtrue > bandg$bhi[,2]))
     hcove <- 1 - mean((gtrue < bandg$blo[,1]) | (gtrue > bandg$bhi[,1]))
     c(gmise, gmiae, gcove, hcove, attributes(lamstar$objective)$edf,pvals)
     }
   Design <- function(n,p,q,rho){
	S <- matrix(rho,q,q)
	diag(S) <- 1
	cbind(mvrnorm(n,rep(0,q),S),matrix(rnorm(n*(p-q)),n,p-q))
	}
   LassoLambdaHat <- function(X, R = 10000, tau = 0.5, c = 1, alpha = 0.05){
   # Chernozhukov and Belloni lasso lambda proposal:
   # p 6 of http://arxiv.org/pdf/0904.2931
   # NB:  the sqrt(tau(1-tau)) cancels in their (2.4) (2.6)  when there is only one tau
        n <- nrow(X)
	sigs <- apply(X,2,sd)
	U <- matrix(runif(n * R),n)
	R <- (t(X) %*% (tau - (U < tau)))/sigs
	r <- apply(abs(R),2,max)
	c * quantile(r, 1 - alpha)
	}

#
# Now the setup for the simulation
date()
system("hostname")
sessionInfo()
set.seed(1900)
sigma <- 0.2
rho <- .5
R <- 1000
n <- 400
p <- 24
q <- 12
r <- 6
X <- Design(n,p,q,rho)
lassolamhat <- LassoLambdaHat(X)
z <- sort(runif(n))
g0 <- function(x, j) 
	sqrt(x *(1-x)) * sin(2*pi*(1 + 2^((9-4*j)/5))/(x + 2^((9-4*j)/5)))
g0z <- g0(z,4)
dgp <- function(x,f=rnorm,j=2,sigma=.2,gamma=0)
	f(length(x))*sigma*(1+gamma*x)
rt1 <- function(n) rt(n,1)
rt3 <- function(n) rt(n,3)
rchisq3 <- function(n) rchisq(n,3) - 3
fs <- c(rnorm,rt3,rt1,rchisq3)
b0 <- .1
gammas <- c(0,1)
H <- expand.grid(1:4,1:2)
J <- nrow(H)

AJ <- foreach(j = 1:J) %dopar% { 
 A <- matrix(0,30,R)
 gamma <- gammas[H[j,2]]
 b <- c(rep(b0,r),rep(0,p-r))
 f <- fs[[H[j,1]]]
 for(i in 1:R){
   y <- g0z + X %*% b + dgp(z, f = f, gamma = gamma)
   if(H[j,1] == 4) # recenter for the chisq case to have conditional median 0
      y <- y  - (2.365974 - 3)*sigma*(1+gamma*z)
   A[,i] <- fitrqss(X,z,y,lassolambda = lassolamhat)
   }
 A
}
