n <- 100 w <- rnorm(n) e <- rnorm(n) x1 <- rnorm(n) x2 <- .5*w+rbinom(n,1,prob=0.4)*e cor(x2,e) cor(w,e) cor(x2,w) y <- 1 + x1 + x2 + e lm(y~x1+x2) # 1. IV wx <- t(cbind(1,x1,w)) %*% cbind(1,x1,x2) wy <- t(cbind(1,x1,w)) %*% y b_iv <- solve(wx) %*% wy b_iv # 2. 2SLS x2hat <- lm(x2~x1+w)$fitted b_2sls <- lm(y~x1+x2hat) b_2sls # When # of instrumental variables > # of endo. variables # use 2SLS ww <- rbinom(n,1,prob=0.5)*w + .5*rnorm(n) cor(ww,x2) cor(ww,e) x2hat <- lm(x2 ~ x1 + w + ww)$fitted lm(y~x1 + x2hat) # two stage least squares tsls <-function(x, z, y, int = T){ if(int){ x <- cbind(1, x) z <- cbind(1, z) } xhat <- lm(x~z-1)$fitted.values R <- lm(y ~ xhat -1) R$residuals <- c(y - x %*% R$coef) return(R) } tsls(cbind(x1,x2),cbind(x1,w),y) tsls(cbind(x1,x2),cbind(x1,w,ww),y) # 3. Hausman test Y <- rbinom(n,1,prob=0.5)*e + 0.4*w y <- 1 + Y + x1 + x2 + e lm(y~Y+x1+x2) W0 <- cbind(x1,x2,w,ww) W1 <- cbind(x1,w,ww) d0<- tsls(cbind(Y,x1,x2),W0,y) d1<- tsls(cbind(Y,x1,x2),W1,y) d0 d1 delta <- d0$coef - d1$coef Vd <- vcov(d1) - vcov(d0) Hausman <- delta %*% solve(Vd) %*% delta # if you want to do it in a correct way var.H <- function(m,z,w, int = T){ if(int){ w <- cbind(1,w) z <- cbind(1,z) } s <- crossprod(m$resid,m$resid)/length(m$resid) Px <- w%*%solve(t(w)%*%w)%*%t(w) vcov <- as.numeric(s)*solve(t(z)%*% Px %*% z) return(vcov) } v0 <- var.H(d0,cbind(Y,x1,x2),W0) v1 <- var.H(d1,cbind(Y,x1,x2),W1) Vd <- v1-v0 Hausman <- delta %*% solve(Vd) %*% delta