n <- 100
w <- rnorm(n)
e <- rnorm(n)
x1 <- rnorm(n)
x2 <- .5*w+rbinom(n,1,prob=0.4)*e


cor(x2,e)
cor(w,e)
cor(x2,w)

y <- 1 + x1 + x2 + e

lm(y~x1+x2)


# 1. IV

wx <- t(cbind(1,x1,w)) %*% cbind(1,x1,x2)

wy <- t(cbind(1,x1,w)) %*% y

b_iv <- solve(wx) %*% wy

b_iv


# 2. 2SLS

x2hat <- lm(x2~x1+w)$fitted

b_2sls <- lm(y~x1+x2hat)

b_2sls


# When # of instrumental variables > # of endo. variables 
# use 2SLS

ww <- rbinom(n,1,prob=0.5)*w + .5*rnorm(n)

cor(ww,x2)

cor(ww,e)

x2hat <- lm(x2 ~ x1 + w + ww)$fitted

lm(y~x1 + x2hat)


# two stage least squares

tsls <-function(x, z, y, int = T){
  if(int){
       x <- cbind(1, x)
       z <- cbind(1, z)
      }
  xhat <- lm(x~z-1)$fitted.values
  R <- lm(y ~ xhat -1)
  R$residuals <- c(y - x %*% R$coef)
 return(R)
}


tsls(cbind(x1,x2),cbind(x1,w),y)

tsls(cbind(x1,x2),cbind(x1,w,ww),y)


# 3. Hausman test

Y <- rbinom(n,1,prob=0.5)*e + 0.4*w

y <- 1 + Y + x1 + x2 + e

lm(y~Y+x1+x2)

W0 <- cbind(x1,x2,w,ww)

W1 <- cbind(x1,w,ww)

d0<- tsls(cbind(Y,x1,x2),W0,y)

d1<- tsls(cbind(Y,x1,x2),W1,y)

d0

d1

delta <- d0$coef - d1$coef

Vd <- vcov(d1) - vcov(d0)

Hausman <- delta %*% solve(Vd) %*%  delta


# if you want to do it in a correct way

var.H <- function(m,z,w, int = T){
  if(int){
   w  <- cbind(1,w)
   z  <- cbind(1,z)
  }
  s  <- crossprod(m$resid,m$resid)/length(m$resid)
  Px <- w%*%solve(t(w)%*%w)%*%t(w)
  vcov <- as.numeric(s)*solve(t(z)%*% Px %*% z)
 return(vcov)
}


v0 <- var.H(d0,cbind(Y,x1,x2),W0)
v1 <- var.H(d1,cbind(Y,x1,x2),W1)

Vd <- v1-v0

Hausman <- delta %*% solve(Vd) %*%  delta