# outcome analysis: optmatch fullmatch, lalonde data
# see week 1 RQ7 for balance checks

R version 3.5.3 (2019-03-11) -- "Great Truth"
Copyright (C) 2019 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)
> library(optmatch)

> data(lalonde)
> dim(lalonde)
[1] 614  10
> library(cobalt)

Attaching package: ‘cobalt’

The following object is masked _by_ ‘.GlobalEnv’:

    lalonde

The following object is masked from ‘package:MatchIt’:

    lalonde

> head(lalonde)
     treat age educ black hispan married nodegree re74 re75       re78
NSW1     1  37   11     1      0       1        1    0    0  9930.0460
NSW2     1  22    9     0      1       0        1    0    0  3595.8940
NSW3     1  30   12     1      0       0        0    0    0 24909.4500
NSW4     1  27   11     1      0       0        1    0    0  7506.1460
NSW5     1  33    8     1      0       0        1    0    0   289.7899
NSW6     1  22    9     1      0       0        1    0    0  4056.4940

> #####
> ###now try optmatch

> # cobalt vignette does it this way
> pfit = glm(f.build("treat", covs), data = lalonde, family = "binomial")
> lalonde$p.score = pfit$fitted.values #get the propensity score
> boxplot(lalonde$p.score ~ lalonde$treat) # propensity score
> fm2 = fullmatch(treat ~ p.score, data = lalonde)
> bal.tab(fm2, formula = f.build("treat", covs), data = lalonde)
Call
 fullmatch(x = treat ~ p.score, data = lalonde)

Balance Measures
            Type Diff.Adj
age      Contin.   0.1494
educ     Contin.  -0.0364
black     Binary   0.0086
hispan    Binary  -0.0013
married   Binary   0.0579
nodegree  Binary   0.0092
re74     Contin.  -0.0636
re75     Contin.  -0.0124

Sample sizes
        Control Treated
All         429     185
Matched     429     185

> love.plot(bal.tab(fm2, formula = f.build("treat", covs), data = lalonde))
> # close, but not quite the same balance table as MatchIt(full); 102 subgroups, 104 from Matchit(full)

> summary(fm2)
Structure of matched sets:
5+:1  4:1  3:1  2:1  1:1  1:2  1:3  1:4 1:5+ 
   8    2    6   16   39    7    4    5   15 
Effective Sample Size:  137.4 
(equivalent number of matched pairs).

> # looks like we got 102 subclasses here, 104 from MatchIt(full)
> stratumStructure(fm2)
12:1  9:1  8:1  7:1  6:1  5:1  4:1  3:1  2:1  1:1  1:2  1:3  1:4  1:5  1:6  1:7  1:8  1:9 1:12 1:13 
   1    1    1    1    1    3    2    6   16   39    7    4    5    1    1    1    1    1    3    3 
1:20 1:41 1:51 1:90 
   1    1    1    1 

## what we need for outcome analysis is to add the subclass info for each unit to the lalonde dataset
## In MatchIt matched.data does this for us
# here we grab a factor giving us the subclass info
# I call the augmented lalonde dataset "matched"

> matched = cbind(lalonde, matches = fm2)
> head(matched)
     treat age educ black hispan married nodegree re74 re75       re78   p.score matches
NSW1     1  37   11     1      0       1        1    0    0  9930.0460 0.6387699     1.1
NSW2     1  22    9     0      1       0        1    0    0  3595.8940 0.2246342    1.98
NSW3     1  30   12     1      0       0        0    0    0 24909.4500 0.6782439   1.109
NSW4     1  27   11     1      0       0        1    0    0  7506.1460 0.7763241   1.120
NSW5     1  33    8     1      0       0        1    0    0   289.7899 0.7016387   1.131
NSW6     1  22    9     1      0       0        1    0    0  4056.4940 0.6990699   1.142
> matched[1:20,]
      treat age educ black hispan married nodegree re74 re75       re78    p.score matches
NSW1      1  37   11     1      0       1        1    0    0  9930.0460 0.63876993     1.1
NSW2      1  22    9     0      1       0        1    0    0  3595.8940 0.22463424    1.98
NSW3      1  30   12     1      0       0        0    0    0 24909.4500 0.67824388   1.109
NSW4      1  27   11     1      0       0        1    0    0  7506.1460 0.77632408   1.120
NSW5      1  33    8     1      0       0        1    0    0   289.7899 0.70163874   1.131
NSW6      1  22    9     1      0       0        1    0    0  4056.4940 0.69906990   1.142
NSW7      1  23   12     1      0       0        0    0    0     0.0000 0.65368426   1.153
NSW8      1  32   11     1      0       0        1    0    0  8472.1580 0.78972311   1.164
NSW9      1  22   16     1      0       0        0    0    0  2164.0220 0.77983825   1.120
NSW10     1  33   12     0      0       1        0    0    0 12418.0700 0.04292461     1.2
NSW11     1  19    9     1      0       0        1    0    0  8173.9080 0.68901996    1.13
NSW12     1  21   13     1      0       0        0    0    0 17094.6400 0.68244400    1.24
NSW13     1  18    8     1      0       0        1    0    0     0.0000 0.64986767    1.35
NSW14     1  27   10     1      0       1        1    0    0 18739.9300 0.56241073    1.46
NSW15     1  17    7     1      0       0        1    0    0  3023.8790 0.60858629    1.57
NSW16     1  19   10     1      0       0        1    0    0  3228.5030 0.72249036    1.68
NSW17     1  27   13     1      0       0        0    0    0 14581.8600 0.70259562   1.131
NSW18     1  23   10     1      0       0        1    0    0  7693.4000 0.73496416    1.90
NSW19     1  40   12     1      0       0        0    0    0 10804.3200 0.71166489    1.97
NSW20     1  26   12     1      0       0        0    0    0 10747.3500 0.66431981    1.99
> table(matched$matches)

  1.1 1.100 1.101 1.102 1.107 1.108 1.109  1.11 1.113 1.114 1.118 1.119 1.120 1.121 1.122 1.123 
    3    13     2     7     5     3     2     2     8     3     2     2     9     3     3    10 
1.124 1.125 1.126 1.129  1.13 1.131 1.132 1.133 1.134 1.135 1.137 1.138 1.139  1.14 1.140 1.141 
    5    14     4     2     2     6     2     3     4     3     2     3     4     4     4     3 
1.142 1.143 1.145 1.148 1.151 1.152 1.153 1.154 1.155 1.157  1.16 1.160 1.162 1.164 1.166 1.167 
    2     2     3     2    13     5     2     3     2     6     5     3     2    10     2     3 
1.168  1.17 1.170 1.172 1.174 1.176 1.182 1.183 1.185  1.19   1.2  1.20  1.22  1.23  1.24  1.26 
    8     3     3    14     4     2    14     2     3     2    21    42     2     3     2     2 
 1.28  1.29   1.3  1.30  1.31  1.34  1.35  1.37  1.40  1.43  1.44  1.46  1.47  1.49  1.52  1.53 
    2    91     5     2     2    13     3     3     3    52     2     5     6     2    13     2 
 1.57  1.60  1.63  1.68   1.7  1.71  1.72  1.75  1.76  1.82  1.85  1.87  1.89  1.90  1.91  1.92 
    4     2     2     7     3     2     2     3     2     2     2     3     9     4     2     4 
 1.93  1.94  1.96  1.97  1.98  1.99 
    2     5     2     2     4     6 
> length(table(matched$matches))
[1] 102

> library(lme4)
Loading required package: Matrix

> str(matched)
'data.frame':   614 obs. of  13 variables:
 $ treat   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ age     : int  37 22 30 27 33 22 23 32 22 33 ...
 $ educ    : int  11 9 12 11 8 9 12 11 16 12 ...
 $ black   : int  1 0 1 1 1 1 1 1 1 0 ...
 $ hispan  : int  0 1 0 0 0 0 0 0 0 0 ...
 $ married : int  1 0 0 0 0 0 0 0 0 1 ...
 $ nodegree: int  1 1 0 1 1 1 0 1 0 0 ...
 $ re74    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ re75    : num  0 0 0 0 0 0 0 0 0 0 ...
 $ re78    : num  9930 3596 24909 7506 290 ...
 $ p.score : num  0.639 0.225 0.678 0.776 0.702 ...
 $ matches : Factor w/ 102 levels "1.1","1.100",..: 1 101 7 13 22 33 39 46 13 59 ...

# so now we can use the factor matches just like we used subclass from MatchIt
# lmer isn't that numerically happy, but we get about the same result

> optmatch_lmer2 = lmer(re78 ~ treat + (1 + treat|matches), data = matched)
Warning message:
In checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv,  :
  Model failed to converge with max|grad| = 0.00776755 (tol = 0.002, component 1)
> summary(optmatch_lmer2)
Linear mixed model fit by REML ['lmerMod']
Formula: re78 ~ treat + (1 + treat | matches)
   Data: matched

REML criterion at convergence: 12634.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.5112 -0.7597 -0.2716  0.5129  7.4641 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 matches  (Intercept)  3476596 1865          
          treat        7026358 2651     -1.00
 Residual             51450478 7173          
Number of obs: 614, groups:  matches, 102

Fixed effects:
            Estimate Std. Error t value
(Intercept)   5875.6      491.7  11.950
treat          464.6      743.7   0.625

Correlation of Fixed Effects:
      (Intr)
treat -0.691
convergence code: 0
Model failed to converge with max|grad| = 0.00776755 (tol = 0.002, component 1)