# outcome analysis: optmatch fullmatch, lalonde data # see week 1 RQ7 for balance checks R version 3.5.3 (2019-03-11) -- "Great Truth" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 (64-bit) > library(optmatch) > data(lalonde) > dim(lalonde) [1] 614 10 > library(cobalt) Attaching package: ‘cobalt’ The following object is masked _by_ ‘.GlobalEnv’: lalonde The following object is masked from ‘package:MatchIt’: lalonde > head(lalonde) treat age educ black hispan married nodegree re74 re75 re78 NSW1 1 37 11 1 0 1 1 0 0 9930.0460 NSW2 1 22 9 0 1 0 1 0 0 3595.8940 NSW3 1 30 12 1 0 0 0 0 0 24909.4500 NSW4 1 27 11 1 0 0 1 0 0 7506.1460 NSW5 1 33 8 1 0 0 1 0 0 289.7899 NSW6 1 22 9 1 0 0 1 0 0 4056.4940 > ##### > ###now try optmatch > # cobalt vignette does it this way > pfit = glm(f.build("treat", covs), data = lalonde, family = "binomial") > lalonde$p.score = pfit$fitted.values #get the propensity score > boxplot(lalonde$p.score ~ lalonde$treat) # propensity score > fm2 = fullmatch(treat ~ p.score, data = lalonde) > bal.tab(fm2, formula = f.build("treat", covs), data = lalonde) Call fullmatch(x = treat ~ p.score, data = lalonde) Balance Measures Type Diff.Adj age Contin. 0.1494 educ Contin. -0.0364 black Binary 0.0086 hispan Binary -0.0013 married Binary 0.0579 nodegree Binary 0.0092 re74 Contin. -0.0636 re75 Contin. -0.0124 Sample sizes Control Treated All 429 185 Matched 429 185 > love.plot(bal.tab(fm2, formula = f.build("treat", covs), data = lalonde)) > # close, but not quite the same balance table as MatchIt(full); 102 subgroups, 104 from Matchit(full) > summary(fm2) Structure of matched sets: 5+:1 4:1 3:1 2:1 1:1 1:2 1:3 1:4 1:5+ 8 2 6 16 39 7 4 5 15 Effective Sample Size: 137.4 (equivalent number of matched pairs). > # looks like we got 102 subclasses here, 104 from MatchIt(full) > stratumStructure(fm2) 12:1 9:1 8:1 7:1 6:1 5:1 4:1 3:1 2:1 1:1 1:2 1:3 1:4 1:5 1:6 1:7 1:8 1:9 1:12 1:13 1 1 1 1 1 3 2 6 16 39 7 4 5 1 1 1 1 1 3 3 1:20 1:41 1:51 1:90 1 1 1 1 ## what we need for outcome analysis is to add the subclass info for each unit to the lalonde dataset ## In MatchIt matched.data does this for us # here we grab a factor giving us the subclass info # I call the augmented lalonde dataset "matched" > matched = cbind(lalonde, matches = fm2) > head(matched) treat age educ black hispan married nodegree re74 re75 re78 p.score matches NSW1 1 37 11 1 0 1 1 0 0 9930.0460 0.6387699 1.1 NSW2 1 22 9 0 1 0 1 0 0 3595.8940 0.2246342 1.98 NSW3 1 30 12 1 0 0 0 0 0 24909.4500 0.6782439 1.109 NSW4 1 27 11 1 0 0 1 0 0 7506.1460 0.7763241 1.120 NSW5 1 33 8 1 0 0 1 0 0 289.7899 0.7016387 1.131 NSW6 1 22 9 1 0 0 1 0 0 4056.4940 0.6990699 1.142 > matched[1:20,] treat age educ black hispan married nodegree re74 re75 re78 p.score matches NSW1 1 37 11 1 0 1 1 0 0 9930.0460 0.63876993 1.1 NSW2 1 22 9 0 1 0 1 0 0 3595.8940 0.22463424 1.98 NSW3 1 30 12 1 0 0 0 0 0 24909.4500 0.67824388 1.109 NSW4 1 27 11 1 0 0 1 0 0 7506.1460 0.77632408 1.120 NSW5 1 33 8 1 0 0 1 0 0 289.7899 0.70163874 1.131 NSW6 1 22 9 1 0 0 1 0 0 4056.4940 0.69906990 1.142 NSW7 1 23 12 1 0 0 0 0 0 0.0000 0.65368426 1.153 NSW8 1 32 11 1 0 0 1 0 0 8472.1580 0.78972311 1.164 NSW9 1 22 16 1 0 0 0 0 0 2164.0220 0.77983825 1.120 NSW10 1 33 12 0 0 1 0 0 0 12418.0700 0.04292461 1.2 NSW11 1 19 9 1 0 0 1 0 0 8173.9080 0.68901996 1.13 NSW12 1 21 13 1 0 0 0 0 0 17094.6400 0.68244400 1.24 NSW13 1 18 8 1 0 0 1 0 0 0.0000 0.64986767 1.35 NSW14 1 27 10 1 0 1 1 0 0 18739.9300 0.56241073 1.46 NSW15 1 17 7 1 0 0 1 0 0 3023.8790 0.60858629 1.57 NSW16 1 19 10 1 0 0 1 0 0 3228.5030 0.72249036 1.68 NSW17 1 27 13 1 0 0 0 0 0 14581.8600 0.70259562 1.131 NSW18 1 23 10 1 0 0 1 0 0 7693.4000 0.73496416 1.90 NSW19 1 40 12 1 0 0 0 0 0 10804.3200 0.71166489 1.97 NSW20 1 26 12 1 0 0 0 0 0 10747.3500 0.66431981 1.99 > table(matched$matches) 1.1 1.100 1.101 1.102 1.107 1.108 1.109 1.11 1.113 1.114 1.118 1.119 1.120 1.121 1.122 1.123 3 13 2 7 5 3 2 2 8 3 2 2 9 3 3 10 1.124 1.125 1.126 1.129 1.13 1.131 1.132 1.133 1.134 1.135 1.137 1.138 1.139 1.14 1.140 1.141 5 14 4 2 2 6 2 3 4 3 2 3 4 4 4 3 1.142 1.143 1.145 1.148 1.151 1.152 1.153 1.154 1.155 1.157 1.16 1.160 1.162 1.164 1.166 1.167 2 2 3 2 13 5 2 3 2 6 5 3 2 10 2 3 1.168 1.17 1.170 1.172 1.174 1.176 1.182 1.183 1.185 1.19 1.2 1.20 1.22 1.23 1.24 1.26 8 3 3 14 4 2 14 2 3 2 21 42 2 3 2 2 1.28 1.29 1.3 1.30 1.31 1.34 1.35 1.37 1.40 1.43 1.44 1.46 1.47 1.49 1.52 1.53 2 91 5 2 2 13 3 3 3 52 2 5 6 2 13 2 1.57 1.60 1.63 1.68 1.7 1.71 1.72 1.75 1.76 1.82 1.85 1.87 1.89 1.90 1.91 1.92 4 2 2 7 3 2 2 3 2 2 2 3 9 4 2 4 1.93 1.94 1.96 1.97 1.98 1.99 2 5 2 2 4 6 > length(table(matched$matches)) [1] 102 > library(lme4) Loading required package: Matrix > str(matched) 'data.frame': 614 obs. of 13 variables: $ treat : int 1 1 1 1 1 1 1 1 1 1 ... $ age : int 37 22 30 27 33 22 23 32 22 33 ... $ educ : int 11 9 12 11 8 9 12 11 16 12 ... $ black : int 1 0 1 1 1 1 1 1 1 0 ... $ hispan : int 0 1 0 0 0 0 0 0 0 0 ... $ married : int 1 0 0 0 0 0 0 0 0 1 ... $ nodegree: int 1 1 0 1 1 1 0 1 0 0 ... $ re74 : num 0 0 0 0 0 0 0 0 0 0 ... $ re75 : num 0 0 0 0 0 0 0 0 0 0 ... $ re78 : num 9930 3596 24909 7506 290 ... $ p.score : num 0.639 0.225 0.678 0.776 0.702 ... $ matches : Factor w/ 102 levels "1.1","1.100",..: 1 101 7 13 22 33 39 46 13 59 ... # so now we can use the factor matches just like we used subclass from MatchIt # lmer isn't that numerically happy, but we get about the same result > optmatch_lmer2 = lmer(re78 ~ treat + (1 + treat|matches), data = matched) Warning message: In checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00776755 (tol = 0.002, component 1) > summary(optmatch_lmer2) Linear mixed model fit by REML ['lmerMod'] Formula: re78 ~ treat + (1 + treat | matches) Data: matched REML criterion at convergence: 12634.7 Scaled residuals: Min 1Q Median 3Q Max -1.5112 -0.7597 -0.2716 0.5129 7.4641 Random effects: Groups Name Variance Std.Dev. Corr matches (Intercept) 3476596 1865 treat 7026358 2651 -1.00 Residual 51450478 7173 Number of obs: 614, groups: matches, 102 Fixed effects: Estimate Std. Error t value (Intercept) 5875.6 491.7 11.950 treat 464.6 743.7 0.625 Correlation of Fixed Effects: (Intr) treat -0.691 convergence code: 0 Model failed to converge with max|grad| = 0.00776755 (tol = 0.002, component 1)