"htiv" <- function(x, y, id, d, z = NULL){ # x design matrix partitioned as given in d # y response vector # id strata indicator # d list of column numbers indicating partitioning of x # x[,d[[1]]] is x1 -- exogonous time varying vars # x[,d[[2]]] is x2 -- endogonous time varying vars # x[,d[[3]]] is z1 -- exogonous time invariant vars # x[,d[[4]]] is z2 -- endogonous time invariant vars # z may contain excluded exogonous variables if there are any # NB. intercept is automatically included x <- as.matrix(cbind(x, 1)) Tx <- PQ(x, id) d[[3]] <- c(d[[3]], dim(x)[2]) Z <- cbind(z, Tx$Ph[, d[[1]]], Tx$Qh[, d[[1]]], Tx$Qh[, d[[2]]],x[, d[[ 3]]]) # instruments r <- tsls(x, Z, y, int = F)$resid Ti <- table(id) Ti.inv <- 1/table(id) rdot2 <- tapply(r, id, mean)^2 v <- lm(rdot2~Ti.inv) v <- v$coef theta <- as.vector(sqrt(v[2]/(v[2] + v[1] * Ti[Tx$is]))) x <- x - (1 - theta) * Tx$Ph y <- y - (1 - theta) * PQ(y, id)$Ph fit <- tsls(x, Z, y, int = F) list(fit=fit,v=v) } "PQ" <- function(h, id){ if(is.vector(h)) h <- matrix(h, ncol = 1) Ph <- unique(id) Ph <- cbind(Ph, table(id)) for(i in 1:ncol(h)) Ph <- cbind(Ph, tapply(h[, i], id, mean)) is <- tapply(id, id) Ph <- Ph[is, - (1:2)] Qh <- h - Ph list(Ph=as.matrix(Ph), Qh=as.matrix(Qh), is=is) } "tsls" <-function(x, z, y, int = T){ # two stage least squares if(int){ x <- cbind(1, x) z <- cbind(1, z) } xhat <- lm(x~z-1)$fitted.values R <- lm(y ~ xhat -1) R$residuals <- c(y - x %*% R$coef) return(R) } #a simple analysis of wages d <- read.table("phuzics.txt",header=TRUE) # id : person identifier # sex : gender (female==1) # y : page equivalent in current year # yr : current year - 1900 # rphd: rank of PhD # Y : discounted cumulative page equivalent # phd : year of PhD - 1900 # ru : dummy for research university(res.pos.==1) # s : current annual salary n <- length(d[,1]) h <- as.matrix(d) h <- cbind(h[3:n,],h[3:n,]-h[2:(n-1),],h[2:(n-1),]-h[1:(n-2),]) #difference h <- h[h[,10]==0,] #ignore obs whose first diff confounds people h <- h[h[,19]==0,] #ignore obs whose second diff confounds people h <- cbind(h[,1:9],h[,7:9]-h[,16:18],h[,7:9]-h[,16:18]-h[,25:27]) h <- cbind(h[,1:4],h[,2]-h[,3],(h[,2]-h[,3])^2,h[,5:15]) dimnames(h)[[2]] <- c("id","yr","phd","sex","exp","exp^2","rphd","ru", "y","Y","s","y <- 1","Y_1","s_1","y <- 2","Y_2","s_2") #h <- h[h[,1]<124,] #fit productivity equation by Hausman Taylor Method y <- log(h[,9]) x <- h[,c(12,15,5:7,3,4)] x[,1:2] <- log(x[,1:2]) #x[,5] <- 1/x[,5]#rank of phd program x[,6] <- as.numeric(x[,6]>60) #vintage effect of phd x[,5] <- 1/(x[,3]*x[,5])#rank of phd program id <- h[,1] dimnames(x)[[2]] <- c("y","y_1","exp","exp2","rphd","phd","sex") vl <- list(3:4,c(1:2,5),6:7,NULL) v <- htiv(x,y,id,vl) print("these are the variance component estimates") print(v$v) print(summary(v$fit))