> ################# WEEK 3 Addendum, Lindner data IPTW analyses > ## now try IPTW (see eval reference), 'alternative to full matching" > ## we have propensity scores as 'distance' in m2full.dat > detach(m2full.dat) > # weights for estimating ATE (according to lore) > # for abcix = 1, 1/(m2full.dat$distance) > # for abcix = 0, 1/(1 - m2full.dat$distance) > m2full.dat$weight.ATE = ifelse(m2full.dat$abcix == 1, 1/m2full.dat$distance, 1/(1 - m2full.dat$distance)) > head(m2full.dat) lifepres cardbill abcix stent height female diabetic acutemi ejecfrac ves1proc distance weights subclass weight.ATE 1 0.0 14301 1 0 163 1 1 0 56 1 0.4079170 1 1 2.451479 2 11.6 3563 1 0 168 0 0 0 56 1 0.5784602 1 6 1.728727 3 11.6 4694 1 0 188 0 0 0 50 1 0.5244469 1 85 1.906771 4 11.6 7366 1 0 175 0 1 0 50 1 0.4727311 1 140 2.115367 5 11.6 8247 1 0 168 1 0 0 55 1 0.4930466 1 176 2.028206 6 11.6 8319 1 0 178 0 0 0 50 1 0.5625524 1 209 1.777612 > tail(m2full.dat) # yes there are non-drug subjects with high propensity (see boxplot) lifepres cardbill abcix stent height female diabetic acutemi ejecfrac ves1proc distance weights subclass weight.ATE 991 11.6 15176 0 1 180 0 0 0 55 3 0.9038603 3.842407 185 10.401532 992 11.6 15736 0 0 170 0 0 1 51 1 0.8262407 3.842407 149 5.755089 993 11.6 19547 0 1 170 0 0 1 55 2 0.9444637 5.123209 189 18.006230 994 11.6 36834 0 0 183 0 0 0 35 3 0.8718633 4.696275 192 7.804163 995 11.6 46124 0 0 175 0 0 0 45 4 0.9342004 1.280802 212 15.197664 996 11.6 47732 0 1 185 0 0 0 40 2 0.8355348 2.561605 203 6.080313 # outcome IPTW analaysis > lm.IPTW = lm(log(cardbill) ~ abcix, data = m2full.dat, weights = (weight.ATE)) > summary(lm.IPTW) Call: lm(formula = log(cardbill) ~ abcix, data = m2full.dat, weights = (weight.ATE)) Weighted Residuals: Min 1Q Median 3Q Max -3.2034 -0.4174 -0.1548 0.2535 6.2000 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 9.47026 0.02245 421.787 < 2e-16 *** abcix 0.10322 0.03184 3.242 0.00123 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.7122 on 994 degrees of freedom Multiple R-squared: 0.01046, Adjusted R-squared: 0.009466 F-statistic: 10.51 on 1 and 994 DF, p-value: 0.001228 > confint(lm.IPTW) 2.5 % 97.5 % (Intercept) 9.42620153 9.5143218 abcix 0.04073608 0.1657008 > # a little smaller effect on cardbill than say lmer, full matching > lm2.IPTW = lm(log(cardbill) ~ abcix + acutemi + ejecfrac, data = m2full.dat, weights = (weight.ATE)) > summary(lm2.IPTW) Call: lm(formula = log(cardbill) ~ abcix + acutemi + ejecfrac, data = m2full.dat, weights = (weight.ATE)) Weighted Residuals: Min 1Q Median 3Q Max -3.1474 -0.4037 -0.1484 0.2401 6.1726 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 9.979448 0.081983 121.726 < 2e-16 *** abcix 0.103107 0.031224 3.302 0.000994 *** acutemi -0.061709 0.044696 -1.381 0.167703 ejecfrac -0.009819 0.001533 -6.406 2.3e-10 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.6984 on 992 degrees of freedom Multiple R-squared: 0.05032, Adjusted R-squared: 0.04745 F-statistic: 17.52 on 3 and 992 DF, p-value: 4.34e-11 > # adding some of the covariates leaves unchanged > # exercise try out this analysis with lifepres outcome > lm2.IPTW = lm(log(cardbill) ~ abcix + acutemi + ejecfrac + ves1proc, data = m2full.dat, weights = (weight.ATE)) > summary(lm2.IPTW) Call: lm(formula = log(cardbill) ~ abcix + acutemi + ejecfrac + ves1proc, data = m2full.dat, weights = (weight.ATE)) Weighted Residuals: Min 1Q Median 3Q Max -3.0136 -0.3896 -0.1455 0.2404 4.8563 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 9.647987 0.087654 110.069 < 2e-16 *** abcix 0.111768 0.030117 3.711 0.000218 *** acutemi -0.099223 0.043301 -2.291 0.022146 * ejecfrac -0.008693 0.001483 -5.861 6.27e-09 *** ves1proc 0.195657 0.022378 8.743 < 2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 0.6733 on 991 degrees of freedom Multiple R-squared: 0.1183, Adjusted R-squared: 0.1148 F-statistic: 33.25 on 4 and 991 DF, p-value: < 2.2e-16 > # nasty ves1proc is spelled with a numeral 1 > # to come, propensity estimated by boosted regression (as in twang) # and partition trees (rpart, cf PSAgraphics)