greene <- read.table("greene14.txt", header=T)

greene

attach(greene)

lnc <- log(Cost)
lny <- log(Output) 

# 1. Pooled OLS

pols <- lm(lnc~lny)

pols


# 2. Within and Between Estimators

"panmat"<-function(x,id){
  x   <- as.matrix(x)
  id  <- as.factor(id)
  xm  <- apply(x,2,function(y,z) tapply(y,z, mean), z=id)
  xdm <- x-apply(xm, 2, function(y,z) rep(y,table(z)),z=id)
  list(xm=xm, xdm=xdm)
 }

lnc_W <- panmat(lnc,Firm)$xdm
lnc_B <- panmat(lnc,Firm)$xm
lny_W <- panmat(lny,Firm)$xdm
lny_B <- panmat(lny,Firm)$xm

length(lnc_W)

length(lnc_B)


# 2.1. Within Estimators (Fixed Effects)

fixed <- lm(lnc ~ 0+ lny +D1+D2+D3+D4+D5+D6)

fixed

within <- lm(lnc_W ~ 0 + lny_W)

within


#anova(pols,fixed)


# 2.2. Between Estimator

between <- lm(lnc_B ~ lny_B)

between


# 3. GLS (Random Effects)

eu <- within$resid - mean(within$resid)

eb <- between$resid

sig_u2 <- var(eu)

sig_a2 <- var(eb) - (1/4)*var(eu)

theta <- sqrt(sig_u2/(sig_u2 + 4*sig_a2))


# 3.1. GLS transformation

lnc_G <- lnc - (1-theta)*rep(lnc_B,each=4)

lny_G <- lny - (1-theta)*rep(lny_B,each=4)

gls  <- lm(lnc_G ~ lny_G)


# 4. Fixed or Random? Hausman test

w1 <- coef(gls)[2] - coef(within)
V1 <- vcov(within) - vcov(gls)[2,2]
H1 <- (w1^2)/V1


w2 <- coef(within) - coef(between)[2]
V2 <- vcov(between)[2,2] - vcov(within) 
H2   <- (w2^2)/V2