#' Maximum Entropy [De]Regularized [Bivariate] Density Estimation
#' 
#' Bivariate density estimation based on maximum entropy methods
#' 
#' See the references for further details. And also Mosek "Manuals". The
#' acronym, according to the urban dictionary has a nice connection to
#' a term used in Bahamian dialect, mostly on the Family Islands like Eleuthera
#' and Cat Island meaning "mess with" "get involved," "get entangled," "fool
#' around," "bother:"
#' "I don't like to medder up with all kinda people"
#' "Don't medder with people (chirren)"
#' "Why you think she medderin up in their business."
#' 
#' This version implements a class of shape constrained bivariate density estimators. 
#' The form of the shape constraint is controlled by the parameter \eqn{\alpha}
#' Setting \eqn{\alpha = 1}, 
#' constrains the fitted density to be log-concave; for \eqn{\alpha = 0.5},  \eqn{-1/\sqrt f}
#' is constrained to be concave; and for \eqn{\alpha \le 0}, \eqn{1/f^{\alpha -1}} is
#' constrained to be concave.  In these cases no further regularization of the smoothness
#' of density is required as the concavity constraint acts as  regularizer.
#' As explained further in Koenker and Mizera (2010) and
#' Han and Wellner (2016) decreasing \eqn{\alpha} constrains the fitted density to lie 
#' in larger classes of quasi-concave
#' densities.  See \code{demo(Gosset)} for an illustration of these options, but be aware
#' that more extreme \eqn{\alpha} pose more challenges from an numerical optimization
#' perspective.  Fitting for \eqn{\alpha < 1} employs a fidelity criterion closely 
#' related to Renyi entropy that is more natural than likelihood for very peaked, or very heavy
#' tailed target densities.  No provision is made for bivariate norm constraints (yet).
#'

#' @param x Data: n by 2 matrix of observations
#' @param m Undata: N by 2 matrix of points at which the estimate is to be
#' evaluated, by default equal to x.
#' @param wt weights associated with x points, must be positive.
#' @param w  integration weights: coefficients w_i that give sum f_i w_i = 1
#' @param mass  norming constant to obtain a density
#' @param alpha Renyi entropy parameter characterizing fidelity criterion
#' by default 1 is log-concave and 0.5 is Hellinger, and so forth
#' @param rtol Convergence tolerance for Mosek algorithm,
#' @param verb Parameter controlling verbosity of solution, 0 for silent, 5
#' gives rather detailed iteration log.
#' @param control Mosek control list 
#' @return An object of class "medde2" with components 
#' \item{m}{points of evaluation on the domain of the density} 
#' \item{f}{function values of the estimated density at the evaluation points m} 
#' \item{g}{estimation function values of the transformed density at points m}   
#' \item{x}{original data points}
#' \item{w}{integration weights}
#' \item{alpha}{Renyi exponent}
#' \item{status}{exit status from Mosek}
#' @author Roger Koenker and Ivan Mizera
#' @seealso A plotting method is available, see \code{plot.medde2}
#' @references  
#' Han, Qiyang and Jon Wellner (2016) ``Approximation and estimation of s-concave 
#' densities via Renyi divergences, \emph{Annals of Statistics}, 44, 1332-1359.
#' Koenker, R and I. Mizera, (2010) ``Quasi-Concave Density Estimation''
#' \emph{Annals of Statistics}, 38, 2998-3027.
#' @keywords nonparametric
#' @export
#' @import Matrix
#' 

medde2 <- function(x, m = x, wt = rep(1,length(x)), w, mass=1, alpha=1,
                   rtol = 1e-6, verb = 0, control = NULL)
{
  x <- as.matrix(x)
  m <- as.matrix(m)
  m. <- nrow(m)
  d. <- ncol(x)
  
  if (ncol(m) != d.) 
    stop("dimensions of the data and prediction points do not match")
  
  beta <- alpha/(alpha-1)

  wt <- wt/sum(wt)
  E <- apply(as.vector(wt)*neinn(x,m),2,sum)

### Mosek won't take nonlinear functions with coefficient zero
  w <- w*mass 
  wp <- w > 0
  wpos <- w[wp]
  w. <- length(wpos)

### may need to be changed if constraint localization attempted
  m.. <- m.*(m.-1)
  D <- spMatrix(m..,m.)
  ii <- 0
  for (k in 1:m.)
    for (j in 1:m.)
      if (j != k) {
        ii <- ii+1
        D[ii,j] <- 1
    }
  D <- D - spMatrix(m.., m., i=1:m.., j=rep(1:m.,rep(m.-1,m.)), x=rep(1,m..))

### note that the code is general, for any dimension 1,2,3,... 
  DD <- spMatrix(m..,0)
  for (k in 1:d.)
    DD <- cBind(DD, spMatrix(m.., m., i=1:m.., j=rep(1:m.,rep(m.-1,m.)),
                             x = t(m[,k,drop=FALSE]) %*% t(D))) 
  
  opro <- matrix(list(), nrow=5, ncol=w.)
  opro[5,] <- rep(0,w.)
  if (alpha == 1) {
    opro[1,] <- "EXP"
    opro[2,] <- (1:m.)[wp]
    opro[3,] <- wpos
    opro[4,] <- rep(-1,w.)
  } else if (alpha == 0) {
    opro[1,] <- "LOG"
    opro[2,] <- (1:m.)[wp]
    opro[3,] <- -wpos
    opro[4,] <- rep(1,w.)
  } else {
    opro[1,] <- "POW"
    opro[4,] <- rep(beta,w.)
    if (alpha > 1) { 
      opro[2,] <- ((m.+1):(m.+m.))[wp]
      opro[3,] <- wpos*rep(1/beta,w.)
    } else { 
      opro[2,] <- (1:m.)[wp]
      opro[3,] <- wpos*rep(-1/beta,w.)
    }
  }

  pr <- list()
  pr$sense <- "min"
  if (alpha == 1) {
    pr$c <- c(E,rep(0,d.*m.))
    pr$A <- cBind(D,-DD)
    pr$bc <- rbind(rep(0,m..),rep(Inf,m..))   
    pr$bx <- rbind(rep(-Inf,(d.+1)*m.),rep(Inf,(d.+1)*m.))
  } else if (alpha > 1) {
    pr$c <- c(-E,rep(0,(d.+1)*m.))
    pr$A <- rBind(cBind(-D,  spMatrix(m..,m.),DD),
                  cBind(-Diagonal(m.),Diagonal(m.),spMatrix(m.,d.*m.)))
    pr$bc <- rbind(rep(0,m..+m.),rep(Inf,m..+m.))
    pr$bx <- rbind(c(rep(-Inf,m.),rep(0,m.),rep(-Inf,d.*m.)),
                   rep(Inf,(d.+2)*m.))
  } else {
    pr$c <- c(E,rep(0,d.*m.))
    pr$A <- cBind(D,-DD)
    pr$bc <- rbind(rep(0,m..),rep(Inf,m..))   
    pr$bx <- rbind(c(rep(0,m.),rep(-Inf,d.*m.)),rep(Inf,(d.+1)*m.))
  }

  pr$scopt <- list(opro=opro)
  pr$dparam$intpnt_nl_tol_rel_gap <- rtol
  if (length(control)) {
        pr$iparam <- control$iparam
        pr$dparam <- control$dparam
        pr$sparam <- control$sparam
    }
  z <- Rmosek::mosek(pr, opts = list(verbose = verb))
  status <- z$sol$itr$solsta
  g <- z$sol$itr$xx[1:m.]
  if (status != "OPTIMAL") 
        warning(paste("Solution status = ", status))
  if (alpha == 1)
    f <- exp(-g)
  else if (alpha > 1)
    f <- pmax(0,g)^(beta-1)
  else
    f <- abs(g)^(beta-1)
  
  list(m = m, f = f, g = g, x = x, w = w,
                 alpha = alpha, status = status)
}

#' Integration weights for Bivariate Renyi fitting
#' @param m points of evaluation of the estimated density
#' @param x data points
#' @author Ivan Mizera
#' @keywords nonparametric
#' @export
#' 
neinn <- function(m, x) {
    x <- as.matrix(x)
    m <- as.matrix(m)
    m. <- nrow(m)
    x. <- nrow(x)
    dis <- rep(0,m.)
    neinn <- Matrix(0,m.,x.)
    for (k in 1:m.) {
      dis <- apply(sweep(x,2,m[k,])^2,1,sum)
      ind <- which(dis==min(dis))
      neinn[k,ind] <- 1
    }
    sweep(neinn,1,apply(neinn,1,sum),"/")
  }
