# Some infrastructure for simulations of the pairwise ranking paper: Return to sorted abilities
#
# DGP for Bradley-Terry Data Fourth Edition
# Generate Boolean observations on n teams playing m games, then convert to binomial 
# Usage:
#	a	vector of "abilities" (should all be positive)
#	m	total number of matches played 
#	prob	vector of probabilities of play for i teams
#			should be of same length as a, need not sum to 1.
#	type	match selection mechanism:
#			"RR"	Round Robin  
#			"RS"	Random Sampling
#			"LS"	Local Sampling a[j] tends to be near a[i]
#
#' @title FUNCTION_TITLE
#' @description FUNCTION_DESCRIPTION
#' @param a PARAM_DESCRIPTION
#' @param m PARAM_DESCRIPTION
#' @param prob PARAM_DESCRIPTION, Default: NULL
#' @param type PARAM_DESCRIPTION, Default: 'RS'
#' @param K PARAM_DESCRIPTION, Default: 1/5
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples 
#' \dontrun{
#' if(interactive()){
#'  #EXAMPLE1
#'  }
#' }
#' @rdname DGP
#' @export 
DGP <- function(a, m, prob = NULL, type = "RS", K = 1/5){
    n <- length(a)
    if(type == "RR"){
	R <- floor(m/n)
	T <- t(combn(1:n, 2))
	fun <- function(t,a,R) rbinom(1, R, a[t[1]]/(a[t[1]]+a[t[2]]))
	W <- apply(T, 1, fun, a = a, R = R)
	L <- R - W
	s <- (T[,1] != T[,2])
	D <- data.frame(T1 = factor(T[,1], levels = 1:n), 
			T2 = factor(T[,2], levels = 1:n),  W=W, L=L)[s,]
	return(D)
    }
    else if(type == "RS"){
	# NB no prohibition that j == i
	i <- sample(1:n, m, prob = prob, replace = TRUE)
	j <- sample(1:n, m, prob = prob, replace = TRUE)
    }
    else if(type  == "LS"){
	n <- length(a)
	i <- j <-  sample(1:n, m, prob = prob, replace = TRUE)
	k <- 1
	while(k <= m){ # This loop is annoying!
	    jk <- round(ceiling(n * rbeta(1, K * i[k], K * (n+1 - i[k])))) 
	    if(jk != i[k]){
		j[k] <- jk
		k <- k + 1
	    }
	}
    }
    else stop(paste("type", type, "not implemented"))
    i <- factor(i, levels = 1:n)
    j <- factor(j, levels = 1:n)
    p <- a[i]/(a[i]+a[j])
    y <- (runif(m) < p) * 1
    # Now aggregate binary response to binomials
    ij <- interaction(i,j)
    N <- aggregate(y ~ ij, FUN = length)
    W <- aggregate(y ~ ij, FUN = sum)[,2]
    L <- N[,2] - W
    ij <- strsplit(as.vector(N[,1]), "\\.")
    H <- t(matrix(as.numeric(unlist(ij)),2))
    s <- (H[,1] != H[,2])
    data.frame(T1 = factor(H[,1], levels = 1:n), 
	       T2 = factor(H[,2], levels = 1:n),  W=W, L=L)
}
# Bradley-Terry fitting with possible group lasso penalty or KW regularization
#' @title FUNCTION_TITLE
#' @description FUNCTION_DESCRIPTION
#' @param D PARAM_DESCRIPTION
#' @param method PARAM_DESCRIPTION, Default: 'MLE'
#' @param lambda PARAM_DESCRIPTION, Default: NULL
#' @param refit PARAM_DESCRIPTION, Default: 4
#' @param bwk PARAM_DESCRIPTION, Default: 0
#' @param tol PARAM_DESCRIPTION, Default: 1e-06
#' @param ... PARAM_DESCRIPTION
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples 
#' \dontrun{
#' if(interactive()){
#'  #EXAMPLE1
#'  }
#' }
#' @rdname BTfit
#' @export 
BTfit <- function(D, method = "MLE", lambda = NULL, refit = 4, bwk = 0, tol = 1e-6, ...){
    X <- (model.matrix(~ T1 - 1, data = D) - model.matrix(~ T2 - 1, data = D))[,-1]
    Y <- with(D,cbind(W, L))
    if(method == "lasso"){
	if(!length(lambda)) stop("Must specify a lambda for lasso method")
	teams <- factor(levels(D$T2)[-1])
	pairs <- t(combn(teams,2))
	P <- model.matrix(~ pairs[,1] - 1) - model.matrix(~ pairs[,2] - 1)
	P <- rbind(diag(length(teams)),P)
	f <- RLR(X, Y, P, lambda = lambda, ...)
	bhat <- c(0, f$coef)
	if(refit > 0){
	    r <- round(f$coef, round(refit))
	    z <- factor(r, levels = unique(r))
	    Z <- model.matrix(~ z - 1)
	    f <- glm.fit(X %*% Z, Y, family = binomial("logit"))
	    bhat <- c(0, Z %*% f$coef)
	}
    }
    else if(method == "KWPM"){
	f <- glm(Y ~ X - 1, family = binomial("logit"))
	sd <- sqrt(diag(vcov(f)))
	G <- GLmix(f$coef, sigma = sd)
	bhat <- c(0,G$dy)
	if(bwk > 0) {
	    H <- KWsmooth(G, bw = bwKW(G,bwk))
	    bhat <- c(0,predict(H,f$coef,newsigma = sd))
	}
    }
    else if(method == "KWPR"){
	f <- glm(Y ~ X - 1, family = binomial("logit"))
	COV <- vcov(f)
	bhat = c(0,f$coef)
	g <- GLmix(bhat[-1], sigma = sqrt(diag(COV)))
	gs <- KWsmooth(g, bw = bwKW(g, bwk))
	n <- length(bhat)
	teams = t(combn(n, 2))
	R <- data.frame(i = teams[,1], j = teams[,2], dpr = NA)
	A <- dnorm(outer(bhat[-1], g$x, "-"), sd = sqrt(diag(COV)))
	gs <- list(x = gs$x, y = gs$y/sum(gs$y))	
	R$dpr[R$i == 1] = A %*% ((gs$x >= 0) * gs$y)/(A %*% gs$y)
	gr = list(x = gs$x[gs$y > tol], y = gs$y[gs$y > tol]/sum(gs$y[gs$y > tol]))
	gridr <- expand.grid(u1 = gr$x, u2 = gr$x)
	fgridr <- expand.grid(f1 = gr$y, f2 = gr$y)
	ffr <- apply(fgridr,1,function(x) x[1]*x[2])
	t <- t(teams)
	b <- bhat
	Gx <- rbind(gridr$u1, gridr$u2)
	Gy <- rbind(fgridr$f1, fgridr$f2)
	p <- n
	m <- ncol(Gx)
	p2 <- p*(p-1)/2
	z <- .Fortran("fdpr",
	    as.integer(p), as.integer(m), as.integer(p2),
	    as.double(b), as.integer(t), as.double(COV),
	    as.double(Gx), as.double(Gy), r = double(p2))
	r <- c(R$dpr[1:(p-1)], z$r[p:p2])
	Rk <- rep(NA, n)
	for (k in 1:n)
	    Rk[k] = sum(1 - r[R$j == k&R$i < R$j]) + sum(r[R$i == k])
	bhat <- -Rk
    }
    else if(method == "MLE"){
	f <- glm(Y ~ X - 1, family = binomial("logit"))
	bhat <- c(0,f$coef)
    }
    else if(method == "Borda")
	bhat <- Borda(D, weighted = FALSE)
    else if(method == "WBorda")
	bhat <- Borda(D, weighted = TRUE)
    else stop(paste(method, "not implemented"))
    class(bhat) <- "BTfit"
    bhat
}
#' @title FUNCTION_TITLE
#' @description FUNCTION_DESCRIPTION
#' @param D PARAM_DESCRIPTION
#' @param weighted PARAM_DESCRIPTION, Default: TRUE
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples 
#' \dontrun{
#' if(interactive()){
#'  #EXAMPLE1
#'  }
#' }
#' @rdname Borda
#' @export 
Borda <- function(D, weighted = TRUE){
    ci1 <- aggregate(W ~ T1, data = D, FUN = sum, drop = FALSE)[,2]
    ci2 <- aggregate(L ~ T2, data = D, FUN = sum, drop = FALSE)[,2]
    ci1[is.na(ci1)] = 0
    ci2[is.na(ci2)] = 0
    ci  <- ci1 + ci2
    if(weighted){
	D$T <- D$W + D$L
	ni1 <- aggregate(T ~ T1, data = D, FUN = sum, drop = FALSE)[,2] 
	ni2 <- aggregate(T ~ T2, data = D, FUN = sum, drop = FALSE)[,2]
	ni1[is.na(ni1)] = 0
	ni2[is.na(ni2)] = 0
	ci <- ci/(ni1 + ni2)
    }
    ci
}
