#' Control parameters for NPMLE of bivariate random coefficient binary response #' #' These parameters can be passed via the code{…} argument of the code{rcbr} function. #' The first three arguments are only relevant if full cell enumeration is employed for #' bivariate version of the NPMLE. #' #' @param uv matrix of evaluation points for potential mass points #' @param u grid of evaluation points for potential mass points #' @param v grid of evaluation points for potential mass points #' @param initial initial point for cell enumeration algorithm #' @param epsbound controls how close witness points can be to vertices of a cell #' @param epstol zero tolerance for witness solutions #' @param presolve controls whether Mosek does a presolve of the LP #' @param verb controls verbosity of Mosek solver 0 implies it is quiet #' #' @return updated list #' @export KW.control <- function(uv = NULL, u = NULL, v = NULL, initial = c(0,0),
epsbound = 1, epstol = 1e-07, presolve = 1, verb = 0) list(uv = uv, u = u, v = v, initial = initial, epsbound = epsbound, epstol = epstol, presolve = presolve, verb = verb)
#' #' NPMLE fitting for random coefficient binary response model #' #' Exact NPMLE fitting requires that the code{uv} argument contain a matrix #' whose rows represent points in the interior of the locally maximal polytopes #' determined by the hyperplane arrangement of the observations. If it is not #' provided it will be computed afresh here; since this can be somewhat time #' consuming, code{uv} is included in the returned object so that it can be #' reused if desired. Approximate NPMLE fitting can be achieved by specifying #' an equally spaced grid of points at which the NPMLE can assign mass using #' the arguments code{u} and code{v}. If the design matrix code{X} contains #' only 2 columns, so we have the Cosslett, aka current status, model then the #' polygons in the prior description collapse to intervals and the default method #' computes the locally maximal count intervals and passes their interior points #' to the optimizer of the log likelihood. Alternatively, as in the bivariate #' case one can specify a grid to obtain an approximate solution. #' #' @param X the design matrix expected to have an intercept column of #' ones as the first column, the last column is presumed to contain values of #' the covariate that is designated to have coefficient one. #' @param y the binary response. #' @param u grid values for the intercept dimension of the random coefficients #' @param v grid values for the intercept dimension of the random coefficients #' @param uv evaluation points for the mass to be assigned by the NPMLE,typically #' the witness points of the locally maximal polytopes,as described in Gu and Koenker (2018). #' @param control is a list of parameters for the fitting, see #' code{KW.control} for further details. #' @return a list with components: #' itemize{ #' item uv evaluation points for the fitted distribution #' item W estimated mass associated with the code{uv} points #' item logLik the loglikelihood value of the fit #' item status mosek solution status #' } #' @author Jiaying Gu and Roger Koenker #' @references #' Gu, J. and R. Koenker (2018) Nonparametric maximum likelihood estimation #' of the random coefficients binary choice model, preprint. #' @keywords nonparametrics
#' importFrom Matrix Rmosek #' @export rcbr.fit.KW1 <- function (X, y, control) {
v <- control$v n <- length(X) if(!length(v)){ X <- X[,2] u <- sort(X) c <- rep(0,n+1) for(k in 1:n) c[k] = sum(X[y==0]< u[k]) + sum(X[y==1]>= u[k]) c[n+1] = sum(X[y==0]<= u[n]) s <- sign(diff(c)) lmax <- which(s[-n] - s[-1] == 2) if(s[1] == -1) lmax = c(1,lmax) if(s[n] == 1 ) lmax = c(lmax,n) v <- u[lmax + 1] } else if (length(v) == 1) v <- seq(min(X) - 1e-4, max(X) + 1e-4, length = v) w <- rep(1, n)/n d <- rep(1, length(v)) A <- outer(X, v, ">=") A <- (y == 1) * A + (y == 0) * (1 - A) f <- KWDual(A, d, w, ...) logL <- sum(log(A %*% f$f)) z <- list(x = v, y = f$f, logL = logL, status = f$status) class(z) <- c("KW1", "density") return(z)
} rcbr.fit.KW2 <- function (x, y, uv = NULL, u = NULL, v = NULL, control){
uv <- control$uv u <- control$u v <- control$v initial <- control$initial epsbound <- control$epsbound epstol <- control$epstol presolve <- control$presolve verb <- control$verb if(length(u) * length(v)){ du <- diff(u)[1] dv <- diff(v)[1] if(!all(abs(diff(u) - du) < epstol)) stop("u-grid must be equally spaced") if(!all(abs(diff(v) - dv) < epstol)) stop("v-grid must be equally spaced") uv <- expand.grid(u, v) } else if(!length(uv)){ # Find locally maximal polygons cells <- NICER(x[,1:2], -x[,3], initial, verb, epsbound, epstol) sv <- cells$SignVector sv[which(y==0),] <- -sv[which(y==0),] uv <- t(cells$w[,neighbours(sv)]) } n <- NROW(x) p <- NCOL(x) w <- rep(1, n)/n B <- as.matrix(cbind(uv, 1)) d <- rep(1, length(nrow(uv))) A <- x %*% t(B) A <- 1 * (A >= 0) A <- (y == 1) * A + (y == 0) * (1 - A) f <- KWDual(A, d, w, verb = verb) # FIXME: pass other control parameters!!! W <- f$f x1 <- x[y==1,] x0 <- x[y==0,] pos <- apply(x1 %*% t(B), 1, function(a) sum(W[a > 0])) neg <- 1 - apply(x0 %*% t(B), 1, function(a) sum(W[a > 0])) logLik <- sum(log(c(pos, neg))) z <- list(uv = uv, W = W, logLik = logLik, status = f$status) class(z) <- c("KW2", "density") return(z)
} logLik.KW2 <- function(z) z$logLik predict.KW2 <- function(z, X, smooth = 0){
W <- z$W if(length(z$uv)) uv <- z$uv else uv <- expand.grid(z$u,z$v) B <- as.matrix(cbind(uv, 1)) Bsd <- sqrt(apply(B^2,1,sum)) B <- B/Bsd if(smooth > 0){ # This may be speeded up with FFT? mu <- length(z$u) mv <- length(z$v) fs <- matrix(0,mu,mv) for(i in 1:mu){ for(j in 1:mv){ Z <- cbind(z$u[i]-uv[,1],z$v[j]-uv[,2]) fs[i,j] <- sum(dmvnorm(Z,sigma = diag(2) * smooth) * W) } } W <- c(fs) W <- W/sum(W) } A <- X %*% t(B) apply(A, 1, function(a) sum(W[a > 0]))
}
#' Dual optimization for Kiefer-Wolfowitz problems #' #' Optimization is carried out by Mosek and uses interior point methods. #' It relies on the pkg{Rmosek} interface to R see installation instructions at #' url{docs.mosek.com/8.1/rmosek/install-interface.html}.
#' #' @param A Linear constraint matrix #' @param d constraint vector #' @param w weights for code{x} should sum to one. #' @param … other parameters passed to control optimization: These may #' include code{rtol} the relative tolerance for dual gap convergence criterion, #' code{verb} to control verbosity desired from mosek, code{verb = 0} is quiet, #' code{verb = 5} produces a fairly detailed iteration log, #' code{method} controls the choice of optimizer: by default this is “mosek” #' which employs interior point methods, #' @return Returns a list with components: #' describe{ #' item{f}{dual solution vector, the mixing density, #' item{g}{primal solution vector, the mixture density #' evaluated at the data points} #' item{logLik}{log likelihood} #' item{status}{return status from Mosek} #' } #' @author R. Koenker #' @references #' Koenker, R and I. Mizera, (2013) “Convex Optimization, Shape Constraints, #' Compound Decisions, and Empirical Bayes Rules,'' emph{JASA}, 109, 674–685. #' #' Mosek Aps (2015) Users Guide to the R-to-Mosek Optimization Interface, #' url{docs.mosek.com/8.1/rmosek/index.html}.
#' #' Koenker, R. and J. Gu, (2017) REBayes: An {R} Package for Empirical Bayes Mixture Methods, #' emph{Journal of Statistical Software}, 82, 1–26. #' @keywords nonparametrics #' @export KWDual <- function(A, d, w, …){ # Dual Kiefer-Wolfowitz MLE for Mixture Problems # # This version implements a class of density estimators solving: # # min_x {F(x) := sum -log (x_i)} s.t. A' x <= d, 0 <= x,
# # # where e.g. A = phi(outer(Y,g,“fun”)), with Y data and g a grid on the support of Y, # and “fun” is some function representing the dependence of the base distribution. # #————————————————————————————- # # Roger Koenker # # First version:24 Feb 2012
# Revised: 10 Jun 2015 # Simplified signature # Revised: 2 Jul 2015 # Added pogs method
KWpogs <- function (A, d, w, control) { # POGS implementation of KWDual
n <- nrow(A) m <- ncol(A) # Uncomment the next two lines if you want to use pogs #f <- list(h = pogs::kIndBox01(n), c = d) #g <- list(h = pogs::kNegLog(m), c = w) pogs.control <- function(rel_tol=1e-4, abs_tol=1e-4, rho=1.0, max_iter=1000, verbose = 1, adaptive_rho=TRUE) list(rel_tol=rel_tol, abs_tol=abs_tol, rho=rho, max_iter=max_iter, verbose=verbose, adaptive_rho=adaptive_rho) params <- pogs.control() if(length(control)){ control <- as.list(control) params[names(control)] <- control } # Uncomment the next line if you want to use pogs #z <- pogs::pogs(A, f, g, params) # This abuse of notation is needed to conform to KWDual f <- z$v/d g <- as.vector(t(A) %*% (f * d)) list(f = f, g = g, status = z$status)
}
n <- nrow(A) m <- ncol(A) A <- t(A)
dots <- list(…)
if(length(dots$method))
if(dots$method == "pogs") { dots$method <- NULL return(KWpogs(A, d, w, control = dots)) } else if(!dots$method == "mosek") stop(paste("No applicable KWDual method: ", dots$method))
# Default mosek method rtol <- ifelse(length(dots$rtol), dots$rtol, 1e-6) verb <- ifelse(length(dots$verb), dots$verb, 0) if(length(dots$control)) control <- dots$control else control <- NULL
C <- rep(0,n) P <- list(sense = “min”) P$c <- C P$A <- Matrix::Matrix(A, sparse = TRUE) P$bc <- rbind(rep(0,m),d) P$bx <- rbind(rep(0,n),rep(Inf,n)) opro <- matrix ( list (), nrow =5, ncol = n) rownames ( opro ) <- c(“ type ”,“j”,“f”,“g”,“h”)
opro <- as.list(rep('log',n)) opro <- as.list(1:n) opro <- as.list(-w) opro <- as.list(rep(1,n)) opro <- as.list(rep(0,n)) P$scopt<- list(opro = opro) P$dparam$intpnt_nl_tol_rel_gap <- rtol if(length(control)){
P$iparam <- control$iparam P$dparam <- control$dparam P$sparam <- control$sparam
} z <- Rmosek::mosek(P, opts = list(verbose = verb)) if(z$response$code != 0)
stop(paste("Mosek error: ", z$response$msg))
status <- z$sol$itr$solsta if (status != “OPTIMAL”)
warning(paste("Solution status = ", status))
f <- z$sol$itr$suc if(min(f) < 0) warning(“estimated mixing distribution has some negative values:
consider reducing rtol")
g <- as.vector(t(A) %*% (f * d)) list(f = f, g = g, status = status) }