## Week 9 RQ1 Sharp Design

## modify rdd manual example
>  x = runif(1000,-1,1)
>  y = 3+2*x+3*(x>=0)+rnorm(1000) 
> # example builds in a treatment effect of 3 points for those selected on x (x>0)
> # story? students with high (or higher) ability selected for enriched instruction
> 
> summary(RDestimate(y~x))

Call:
RDestimate(formula = y ~ x)

Type:
sharp 

Estimates:
           Bandwidth  Observations  Estimate  Std. Error  z value  Pr(>|z|)       
LATE       0.6032      604          2.991     0.1738      17.20     2.442e-66  ***
Half-BW    0.3016      303          2.929     0.2483      11.80     4.054e-32  ***
Double-BW  1.2065     1000          3.026     0.1305      23.18    6.543e-119  ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

F-statistics:
           F       Num. DoF  Denom. DoF  p
LATE        758.3  3         600         0
Half-BW     304.3  3         299         0
Double-BW  1808.0  3         996         0

> plot(RDestimate(y~x))

> #compare with our simple ancova approach
>  rubin = lm(y ~ (x>0) + x)
>  summary(rubin)

Call:
lm(formula = y ~ (x > 0) + x)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0350 -0.7003  0.0177  0.6848  3.5878 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   2.9285     0.0726   40.34   <2e-16 ***
x > 0TRUE     3.0356     0.1289   23.55   <2e-16 ***
x             2.0210     0.1114   18.14   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.014 on 997 degrees of freedom
Multiple R-squared:  0.868,     Adjusted R-squared:  0.8678 
F-statistic:  3279 on 2 and 997 DF,  p-value: < 2.2e-16

> # double BW uses all the obs, like ancova almost identical results


## lets take a look at the 303 observations included in the Half-BW grouping
> rq1 = as.data.frame(cbind(x,y))
> head(rq1)
           x        y
1  0.5371494 7.640610
2  0.7470347 6.594326
3  0.0105800 5.825933
4  0.3941627 6.109780
5 -0.7297877 2.169437
6 -0.1283131 1.511972
> summary(rq1)
       x                   y         
 Min.   :-0.999332   Min.   :-1.314  
 1st Qu.:-0.509861   1st Qu.: 1.907  
 Median : 0.014509   Median : 4.702  
 Mean   : 0.002246   Mean   : 4.472  
 3rd Qu.: 0.493405   3rd Qu.: 6.990  
 Max.   : 0.993605   Max.   :11.453  

# go out half-BW about center of 0, get 303 observations "near" the cut score
> rq1s = subset(rq1, subset = x > -.3016 & x < .30161)  
> head(rq1s)
            x        y
3   0.0105800 5.825933
6  -0.1283131 1.511972
15 -0.1222274 0.904357
16 -0.1427003 1.645295
18  0.1000019 7.029370
19 -0.2639345 2.014617
> dim(rq1s)
[1] 303   2

# compare groups near the cut-score
> ttest = lm(y ~ x >0, rq1s)
> summary(ttest)

Call:
lm(formula = y ~ x > 0, data = rq1s)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.4699 -0.7441 -0.0052  0.6762  3.3492 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.61068    0.08449   30.90   <2e-16 ***
x > 0TRUE    3.68678    0.11700   31.51   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.017 on 301 degrees of freedom
Multiple R-squared:  0.7674,    Adjusted R-squared:  0.7666 
F-statistic: 992.9 on 1 and 301 DF,  p-value: < 2.2e-16

> t.test(y ~ x >0, data = rq1s)

        Welch Two Sample t-test

data:  y by x > 0
t = -31.498, df = 298.28, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -3.917124 -3.456431
sample estimates:
mean in group FALSE  mean in group TRUE 
           2.610676            6.297453 

> wilcox.test(y ~ x >0, data = rq1s, conf.int = TRUE)

        Wilcoxon rank sum test with continuity correction

data:  y by x > 0
W = 65, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
95 percent confidence interval:
 -3.909022 -3.430175
sample estimates:
difference in location 
             -3.673628 

>  # the central 303 give a larger estimate than correct, need to go smaller slice, maybe +/- .1 on x

# now try rdrobust package
######################################################################################
> install.packages("rdrobust")
> library(rdrobust)

> rdr = rdrobust(rq1$y, rq1$x) # see manual for syntax
> summary(rdr)
$call
rdrobust(y = rq1$y, x = rq1$x)

$coefficients
                  Coeff Std. Err.         z        P>|z| CI Lower CI Upper
Conventional   2.943474 0.2484884 11.845517 2.270145e-32 2.456445 3.430502
Bias-Corrected 2.953906 0.2484884 11.887500 1.374596e-32 2.466878 3.440935
Robust         2.953906 0.2966878  9.956278 2.367609e-23 2.372409 3.535404

attr(,"class")
[1] "summary.rdrobust"
## results most similar to half-BW estimate from rdd function


> rdplot(rq1$y, rq1$x) # annoying you don't plot the object
> # similar plot but a little nicer than rdd version

END RQ1