simulation described in Freedman text (note: page 199 revised version) bottom p.191-top p.192 scan of pages 191-192 at http://statweb.stanford.edu/~rag/stat209/dafp191.pdf > library(MASS) > ?mvrnorm # shows you the help file; easiest way to create artificial data > # Freedman pp191-2, simulate 100 obs make .3 correlation delta epsilon > zeddelteps = mvrnorm(n=100, mu = c(0,0,0), Sigma= + matrix(nrow = 3, ncol = 3, data = c(3,0,0,0,1,.3,0,.3,1), byrow = T), empirical = TRUE) > cor(zeddelteps) [,1] [,2] [,3] [1,] 1.000000e+00 2.661070e-16 -3.463923e-16 [2,] 2.661070e-16 1.000000e+00 3.000000e-01 [3,] -3.463923e-16 3.000000e-01 1.000000e+00 > # could also just take what I get from mvrnorm to get close go up to 400 at least > xsim = .75*zeddelteps[,1] + zeddelteps[,2] # C = .75 set > cor(xsim, zeddelteps[,1]) [1] 0.7924058 > ysim = .5*xsim + zeddelteps[,3] # beta = .5 set > cor(ysim,xsim) [1] 0.7140388 > ols = lm(ysim ~ xsim) > summary(ols) Call: lm(formula = ysim ~ xsim) Residuals: Min 1Q Median 3Q Max -3.8382 -0.4933 0.0961 0.6177 1.8380 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.542e-16 9.881e-02 -2.57e-15 1 xsim 6.116e-01 6.058e-02 10.10 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.9881 on 98 degrees of freedom Multiple R-Squared: 0.5099, Adjusted R-squared: 0.5048 F-statistic: 101.9 on 1 and 98 DF, p-value: < 2.2e-16 > .1116/sqrt(.06058) # so we have a bit of bias from this correl [1] 0.4534188 > cor(xsim,zeddelteps[,3]) [1] 0.1829983 > cov(ysim, zeddelteps[,1])/cov(xsim, zeddelteps[,1]) #IV [1] 0.5 > # so the IV estimate gets beta > # do 2SLS version of IV > xpred = lm(xsim ~ zeddelteps[,1]) > stage2 = lm(ysim ~ fitted(xpred)) > summary(stage2) Call: lm(formula = ysim ~ fitted(xpred)) Residuals: Min 1Q Median 3Q Max -3.2864 -0.9196 0.1151 0.8456 3.3366 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -4.441e-17 1.251e-01 -3.55e-16 1 fitted(xpred) 5.000e-01 9.681e-02 5.165 1.27e-06 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.251 on 98 degrees of freedom Multiple R-Squared: 0.2139, Adjusted R-squared: 0.2059 F-statistic: 26.67 on 1 and 98 DF, p-value: 1.266e-06 matches IV > # even in this benign case IV via 2SLS has 50% larger standard error, > # enough to offset the (slight) bias of OLS??? ==============================