# Horowitz (1993) data: transportation mode to work [car versus other transit]
# downloaded from https://www.gams.com/latest/gamslib_ml/libhtml/gamslib_mws.html
# (see also http://qed.econ.queensu.ca/jae/2012-v27.7/benoit-van_den_poel/readme.bv.txt)
# Binary Response Single Index model 
# Use the simple score estimator from Groeneboom and Henderickx (2018, AoS)
# NB.  The results reported in the paper are based on running this code with Mosek V8
#      with V9 the results for GH and cars0  fail to reproduce the V8 results.
# Calls the np package to estimate the Klein and Spady model 
require(RCBR)
require(Rmosek)
require(REBayes)
require(mvtnorm)
require(np)

#dat = read.table("Horowitz93.txt")
data(Horowitz93)
dat = Horowitz93
# DCOST: difference in cost (1960 cents) of car versus transit (transit - car)
# CARS: number of cars at home 
# DOVTT: difference in out of vehicle time (transit - car)
# DIVTT: difference in in vehicle time (transit - car)
# DEPEND: choose 1 means car, 0 means transit
pdf("Horowitz93_SingleIndex.pdf", height = 12, width = 10)
par(mfrow=c(3,2))
llstar <- rep(0,3)
llks <- rep(0,3)
llfp <- rep(0,3)
llstargh <- rep(0,3)
bhat <- rep(0,3)
bhatks <- rep(0,3)
bhatksse <- rep(0,3)
bhatgh <- rep(0,3)
bhatghse <- rep(0,3)
bprobit <- matrix(0, 3, 2)
bprobitse <- matrix(0,3,2)
costdiff = seq(0, 1, by = 0.05)
pc <- array(NA, c(3, length(costdiff),3))
timediff = 1:15
pt <-  array(NA, c(3, length(timediff),3))

for (j in c(0,1,2)){
datsub <- dat[dat$CARS==j,]
n = dim(datsub)[1]
# Horowitz did the same normalization of DCOST (compare probit output with the Table 2 in the paper)
X <- cbind(1, datsub$DOVTT, datsub$DCOST/100)  
y = datsub$DEPEND

# probit model fixed coefficient # normalize the coefficient to be 1 for DCOST
fp = glm(DEPEND~ DOVTT + offset(I(DCOST/100)), data = datsub, family = binomial(link = "probit"))  
llfp[j+1] <- logLik(fp)
bprobit[j+1,] <- fp$coeff
bprobitse[j+1,] <- summary(fp)$coeff[,2]

fprobit = glm(DEPEND~ I(DCOST/100)+DOVTT, data = datsub, family = binomial(link = "probit"))


 # Cosslett Single Index 
  	
b <- seq(-0.6,0.4,length=1000)
ll <- b
for (k in 1:length(b)){ 	
sindex <- X[,2] * b[k]+X[,3]
fcoss <- Cosslett(sindex, y, v = 1000)
ll[k] <- fcoss$logL
}
optk <- which.max(ll)
sindex <- X[,2] * b[optk] +X[,3]
fstar <- Cosslett(sindex, y, v = sort(sindex))
bhat[j+1] <- b[optk]
llstar[j+1] <- fstar$logL

# Klein and Spady Gaussian kernel
plot(b, ll, type="l", xlab = "b", ylab = "LL")

abline(v = b[which.max(ll)], lty = 2)



# klein and Spady using np package
bw = npindexbw(DEPEND~I(DCOST/100)+DOVTT, data = datsub,method = "kleinspady",
	       optim.method = "BFGS",nmulti=20,ckertype="gaussian",ckerorder=2)
model.index <- npindex(bw, gradient = TRUE)
bhatks[j+1] <- model.index$beta[2]
llks[j+1] <- - bw$fval*length(y)
bhatksse[j+1] <- sqrt(model.index$betavcov[2,2])

# KW estimator for F using KS estimates for parameters
Xb <- X[,2]*model.index$beta[2] + X[,3]
ffks = Cosslett(Xb, y, v = sort(Xb))  

# GH simple score estimator 
ll2 <- b 
for (k in 1:length(b)){
	ll2[k] = GH(c(b[k], 1), cbind(X[,2], X[,3]), y)
	}

bhatgh[j+1] <- b[which.min(abs(ll2))]
bhatghse[j+1] = GH.se(c(b[which.min(abs(ll2))],1), cbind(X[,2],X[,3]), y, hc = 2)
sindex = X[,2]*b[which.min(abs(ll2))] + X[,3]
fGHstar <- Cosslett(sindex, y, v = sort(sindex))
llstargh[j+1] = fGHstar$logL
if (j==0) Feta0 <- list(x = fGHstar$x, y = fGHstar$y)
if (j ==1) Feta1 <- list(x = fGHstar$x, y= fGHstar$y)
if (j==2) Feta2 <- list(x = fGHstar$x, y = fGHstar$y)

plot(b, ll2, type="l", xlab = "b", ylab = "score")
abline(h = 0, lty=2)
abline(v = b[which.min(abs(ll2))], lty = 2)

# marginal effect 
# fix DOVTT at 75th quantile among those who drive

costdiff = seq(0, 1, by = 0.05)

for (i in 1:length(costdiff)){
    sindex0coss <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*b[which.min(abs(ll2))] + 
    	quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100
    sindex1coss <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*b[which.min(abs(ll2))] + 
	quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100-costdiff[i]
    sindex0ks <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*model.index$beta[2] + 
	quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100
    sindex1ks <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*model.index$beta[2] + 
	quantile(dat$DCOST[dat$DEPEND==1],0.75)/100-costdiff[i]
    pc[1,i,(j+1) ] = sum(fGHstar$y[fGHstar$x<sindex0coss]) - 
	sum(fGHstar$y[fGHstar$x<sindex1coss])
    pc[2,i,(j+1)] <- predict(fprobit, 
	 data.frame(DCOST = quantile(datsub$DCOST[datsub$DEPEND==1],0.75), 
	 DOVTT = quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)  )) - 
	predict(fprobit, data.frame(DCOST =quantile(datsub$DCOST[datsub$DEPEND==1],0.75) - 
	  100* costdiff[i], DOVTT = quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)  ))
    pc[3,i,(j+1)]  = sum(ffks$y[ffks$x<sindex0ks]) - sum(ffks$y[ffks$x<sindex1ks])
}

# fix DCOST at 75 quantile among those who drive
timediff = 1:15
for (i in 1:length(timediff)){
	sindex0coss <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*b[which.min(abs(ll2))] + 
	    quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100
	sindex1coss <- (quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)-timediff[i])*b[which.min(abs(ll2))] + 
	    quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100
	sindex0ks <- quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)*model.index$beta[2] + 
	   quantile(datsub$DCOST[datsub$DEPEND==1],0.75)/100
	sindex1ks <- (quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)-timediff[i])*model.index$beta[2] + 
	    quantile(dat$DCOST[dat$DEPEND==1],0.75)/100
	pt[1,i ,(j+1)] = sum(fGHstar$y[fGHstar$x < sindex0coss]) - sum(fGHstar$y[fGHstar$x < sindex1coss])
	pt[2,i,(j+1)] = predict(fprobit, data.frame(DCOST =quantile(datsub$DCOST[datsub$DEPEND==1],0.75), 
	    DOVTT = quantile(datsub$DOVTT[datsub$DEPEND==1],0.75)  )) - 
	    predict(fprobit, data.frame(DCOST =quantile(datsub$DCOST[datsub$DEPEND==1],0.75), 
		DOVTT = quantile(datsub$DOVTT[datsub$DEPEND==1],0.75) - timediff[i] ))
	pt[3,i,(j+1)] = sum(ffks$y[ffks$x<sindex0ks]) - sum(ffks$y[ffks$x<sindex1ks])
	}
}
dev.off()

pdf("MarginalEffect_singleindex.pdf",height = 12, width = 10)
par(mfcol = c(2,3))
for(k in 1:3){
    plot(costdiff,pc[1,,k],ylim = range(pc[1,,k],pc[2,,k], pc[3,,k]),
	 xlab = "reduction in fare ($)", ylab = "Change in transit probability")
	 points(costdiff, pc[2,,k], col=2, pch = 4)
	 points(costdiff, pc[3,,k], col=3, pch = 20)
	 legend("topleft", c("NPMLE", "KS", "Probit"), col = c(1, 3, 2), pch = c(1, 20, 4))
    plot(timediff,pt[1,,k],ylim = range(pt[1,,k],pt[2,,k], pt[3,,k]), 
	xlab = "reduction transit time (min)", ylab = "Change in transit probability")
	points(timediff,pt[2,,k],col=2,pch  =4)
	points(timediff,pt[3,,k],col=3,pch  =20)
	legend("topleft", c("NPMLE", "KS", "Probit"), col = c(1, 3, 2), pch = c(1, 20, 4))
}
dev.off()



best <- t(rbind(bhatgh, bhatghse, llstargh,bhatks, bhatksse, llks, bprobit[,2],bprobitse[,2], llfp))
rownames(best)<-c("0car","1car","2car")
colnames(best) = c("eta2_GH", "GH_se", "loglik", "eta2_KS", "KS_se", "loglik", "eta2_probit","probit_se", "loglik")

require(xtable)
xtable(best, digits = 3)

