
      Lab 2, alternative version using package lme4, lmer function    1/30/16

Refer to the main Lab2 postings for background and data manipulation commentary
The purpose of this posting is to give the small alterations for using
the updated lme4 (versus the legacy nlme) package.
Also at the end I did some addition descriptive plots.

The only difference apparent to the user is the slightly "improved" syntax for lmer
in stating the random (level 1) part of the combined model.
Also there are some additional plots and description, some shown in class, 
that I put in the version here but didn't duplicate in the legacy nlme version.

I did this session before I took to posting the complete Bryk dataset.
If I had that available I could have simply started this by reading in that data set
That is, at the end of lab2 (nlme historical version), a command such as
  > write.table(Bryk, file = "D:\\drr11\\stat209\\labs\\brykdata", quote = FALSE)
would have created the 6 cols that we worked hard to build up in legacy Lab2

Below we need to install 2 packages: lme4 and MEMSS, the later from Bates-Pinhero
book contains the HSB datasets (as a substitute for using these data from the nlme
package). You do not want to have nlme and lme4 loaded
in the same R-session. If you want to juggle these, before bringing in lme4 for ex 
clear out nlme by (this works for me)
>detach("package:nlme")

Notice especially that almost everything is the same in lme4 and it was in nlme
package; only the syntax in the lmer function differs (better).
I do all the steps here just to show you the similarities.
------------------------------------------------------------------


> library(lme4)
Loading required package: Matrix
Loading required package: lattice
Attaching package: 'Matrix'
The following object(s) are masked from 'package:base':
    det
Attaching package: 'lme4'
The following object(s) are masked from 'package:stats':
    AIC

> library(MEMSS)
Attaching package: 'MEMSS'
The following object(s) are masked from 'package:datasets':
    CO2, Orange, Theoph

  Redo all the data manips from Lab2 (see those for commentary)
> data(MathAchieve)
> MathAchieve[1:10,]
   School Minority    Sex    SES MathAch MEANSES
1    1224       No Female -1.528   5.876  -0.428
2    1224       No Female -0.588  19.708  -0.428
3    1224       No   Male -0.528  20.349  -0.428
4    1224       No   Male -0.668   8.781  -0.428
5    1224       No   Male -0.158  17.898  -0.428
6    1224       No   Male  0.022   4.583  -0.428
7    1224       No Female -0.618  -2.832  -0.428
8    1224       No   Male -0.998   0.523  -0.428
9    1224       No Female -0.888   1.527  -0.428
10   1224       No   Male -0.458  21.521  -0.428
> dim(MathAchieve)
[1] 7185    6
> data(MathAchSchool)
> MathAchSchool[1:10,]
     School Size   Sector PRACAD DISCLIM HIMINTY MEANSES
1224   1224  842   Public   0.35   1.597       0  -0.428
1288   1288 1855   Public   0.27   0.174       0   0.128
1296   1296 1719   Public   0.32  -0.137       1  -0.420
1308   1308  716 Catholic   0.96  -0.622       0   0.534
1317   1317  455 Catholic   0.95  -1.694       1   0.351
1358   1358 1430   Public   0.25   1.535       0  -0.014
1374   1374 2400   Public   0.50   2.016       0  -0.007
1433   1433  899 Catholic   0.96  -0.321       0   0.718
1436   1436  185 Catholic   1.00  -1.141       0   0.569
1461   1461 1672   Public   0.78   2.096       0   0.683
> dim(MathAchSchool)
[1] 160   7
>  attach(MathAchieve)
>  mses = tapply(SES, School, mean) # mean of SES for each school
>  detach(MathAchieve)
> dim(mses)
[1] 160
> mses[1:10]
       1224        1288        1296        1308        1317        1358        1374        1433        1436        1461 
-0.43438298  0.12160000 -0.42550000  0.52800000  0.34533333 -0.01966667 -0.01264286  0.71200000  0.56290909  0.67745455 
> Bryk = as.data.frame(MathAchieve[, c("School", "SES", "MathAch" )])
>  names(Bryk) = c("school", "ses", "mathach")
>  Bryk[1:10,]
   school    ses mathach
1    1224 -1.528   5.876
2    1224 -0.588  19.708
3    1224 -0.528  20.349
4    1224 -0.668   8.781
5    1224 -0.158  17.898
6    1224  0.022   4.583
7    1224 -0.618  -2.832
8    1224 -0.998   0.523
9    1224 -0.888   1.527
10   1224 -0.458  21.521
> Bryk$meanses = mses[as.character(Bryk$school)] #cute trick for linking school and meanses
> 
>  Bryk[1:10,]
   school    ses mathach   meanses
1    1224 -1.528   5.876 -0.434383
2    1224 -0.588  19.708 -0.434383
3    1224 -0.528  20.349 -0.434383
4    1224 -0.668   8.781 -0.434383
5    1224 -0.158  17.898 -0.434383
6    1224  0.022   4.583 -0.434383
7    1224 -0.618  -2.832 -0.434383
8    1224 -0.998   0.523 -0.434383
9    1224 -0.888   1.527 -0.434383
10   1224 -0.458  21.521 -0.434383
>  Bryk$cses = Bryk$ses - Bryk$meanses 
> # the centered individual ses, relative standing on the ses measure for a student within school
> # we need this so that intercept in Level I model is mean achievement for school
> 
>  Bryk[1:10,]
   school    ses mathach   meanses        cses
1    1224 -1.528   5.876 -0.434383 -1.09361702
2    1224 -0.588  19.708 -0.434383 -0.15361702
3    1224 -0.528  20.349 -0.434383 -0.09361702
4    1224 -0.668   8.781 -0.434383 -0.23361702
5    1224 -0.158  17.898 -0.434383  0.27638298
6    1224  0.022   4.583 -0.434383  0.45638298
7    1224 -0.618  -2.832 -0.434383 -0.18361702
8    1224 -0.998   0.523 -0.434383 -0.56361702
9    1224 -0.888   1.527 -0.434383 -0.45361702
10   1224 -0.458  21.521 -0.434383 -0.02361702
>  sector = MathAchSchool$Sector
>  names(sector) = row.names(MathAchSchool)
>  Bryk$sector = sector[as.character(Bryk$school)]
>  Bryk[1:10,]
   school    ses mathach   meanses        cses sector
1    1224 -1.528   5.876 -0.434383 -1.09361702 Public
2    1224 -0.588  19.708 -0.434383 -0.15361702 Public
3    1224 -0.528  20.349 -0.434383 -0.09361702 Public
4    1224 -0.668   8.781 -0.434383 -0.23361702 Public
5    1224 -0.158  17.898 -0.434383  0.27638298 Public
6    1224  0.022   4.583 -0.434383  0.45638298 Public
7    1224 -0.618  -2.832 -0.434383 -0.18361702 Public
8    1224 -0.998   0.523 -0.434383 -0.56361702 Public
9    1224 -0.888   1.527 -0.434383 -0.45361702 Public
10   1224 -0.458  21.521 -0.434383 -0.02361702 Public

> dim(Bryk)
[1] 7185    6
> remove(sector) #clean up variable (use it within Bryk dataset)

########################################################################
# END Data Manipulation section
########################################################################

>  attach(Bryk) #we can refer to variables by simple name
>  table(sector) #get the student breakdown 
sector
  Public Catholic 
    3642     3543 
>  cathRegC = lmList(mathach ~ cses| school, subset = sector == "Catholic", data = Bryk)
>  pubRegC = lmList(mathach ~ cses| school, subset = sector == "Public", data = Bryk)
> length(cathRegC); length(pubRegC)
[1] 70
[1] 90
> pubcoef= coef(pubRegC)
> cathcoef= coef(cathRegC)
>  par( mfrow = c(1,2)) # opens a graphics window, creates the figure shown in lecture
>  boxplot(cathcoef[,1], pubcoef[,1], main = 'Intercepts', names = c('Catholic', 'Public'))
>  boxplot(cathcoef[,2], pubcoef[,2], main = 'Slopes', names = c('Catholic', 'Public'))
> 
> # order the sector factor
>  Bryk$sector = factor(Bryk$sector, levels = c('Public', 'Catholic'))


####here is the main event, fitting the random effects model using lmer, 
           note change in syntax, same output values

> bryklmer = lmer(mathach ~ meanses*cses + sector*cses + (1 +  cses|school), data = Bryk)
> summary(bryklmer)
Linear mixed model fit by REML 
Formula: mathach ~ meanses * cses + sector * cses + (1 + cses | school) 
   Data: Bryk 
   AIC   BIC logLik deviance REMLdev
 46524 46592 -23252    46496   46504
Random effects:
 Groups   Name        Variance Std.Dev. Corr  
 school   (Intercept)  2.37958 1.54259        
          cses         0.10122 0.31814  0.391 
 Residual             36.72116 6.05980        
Number of obs: 7185, groups: school, 160

Fixed effects:
                    Estimate Std. Error t value
(Intercept)          12.1279     0.1993   60.86
meanses               5.3329     0.3692   14.45
cses                  2.9450     0.1556   18.93
sectorCatholic        1.2266     0.3063    4.00
meanses:cses          1.0392     0.2989    3.48
cses:sectorCatholic  -1.6427     0.2398   -6.85

Correlation of Fixed Effects:
            (Intr) meanss cses   sctrCt mnss:c
meanses      0.256                            
cses         0.075  0.019                     
sectorCthlc -0.699 -0.356 -0.052              
meanses:css  0.019  0.074  0.293 -0.026       
css:sctrCth -0.052 -0.027 -0.696  0.077 -0.351
> 
# no p-values in base lme4 functions
#  Why lmer (lme4) does not provide p-values for fixed effects 
    Doug Bates: lmer, p-values and all that 
   http://r.789695.n4.nabble.com/lmer-p-values-and-all-that-td800469.html 
## see p-values section of the lme4 manual for more discussion
## package afex is a good choice for adding on p-values if you insist.

########################################
# if you read in brykprepared (rather than prepare the data yourself)
# if you do not reorder the sector factor (as done above)
# you will be doing Pub - Cath instead of Cath - Pub (lmer summary shows you that)
> library(lme4)
Loading required package: Matrix
Loading required package: Rcpp
> Bryk = read.table("http://statweb.stanford.edu/~rag/stat209/brykdataPrepared", header = T)
> head(Bryk)
  school    ses mathach   meanses        cses sector
1   1224 -1.528   5.876 -0.434383 -1.09361702 Public
2   1224 -0.588  19.708 -0.434383 -0.15361702 Public
3   1224 -0.528  20.349 -0.434383 -0.09361702 Public
4   1224 -0.668   8.781 -0.434383 -0.23361702 Public
5   1224 -0.158  17.898 -0.434383  0.27638298 Public
6   1224  0.022   4.583 -0.434383  0.45638298 Public
> bryklmer = lmer(mathach ~ meanses*cses + sector*cses + (1 +  cses|school), data = Bryk)
> summary(bryklmer)
Linear mixed model fit by REML ['lmerMod']
Formula: mathach ~ meanses * cses + sector * cses + (1 + cses | school)
   Data: Bryk

REML criterion at convergence: 46503.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.15926 -0.72319  0.01704  0.75445  2.95822 

Random effects:
 Groups   Name        Variance Std.Dev. Corr
 school   (Intercept)  2.3796  1.5426       
          cses         0.1012  0.3181   0.39
 Residual             36.7212  6.0598       
Number of obs: 7185, groups:  school, 160

Fixed effects:
                  Estimate Std. Error t value
(Intercept)        13.3545     0.2195   60.85
meanses             5.3329     0.3692   14.45
cses                1.3024     0.1725    7.55
sectorPublic       -1.2266     0.3063   -4.00
meanses:cses        1.0392     0.2989    3.48
cses:sectorPublic   1.6427     0.2398    6.85

Correlation of Fixed Effects:
            (Intr) meanss cses   sctrPb mnss:c
meanses     -0.265                            
cses         0.079 -0.021                     
sectorPublc -0.761  0.356 -0.060              
meanses:css -0.020  0.074 -0.224  0.026       
css:sctrPbl -0.060  0.027 -0.762  0.077  0.351

##do the reordering
> Bryk$sector = factor(Bryk$sector, levels = c('Public', 'Catholic'))
## repeat the lmer (now matches  the data from the labs)
> bryklmer = lmer(mathach ~ meanses*cses + sector*cses + (1 +  cses|school), data = Bryk)
> summary(bryklmer)
Linear mixed model fit by REML ['lmerMod']
Formula: mathach ~ meanses * cses + sector * cses + (1 + cses | school)
   Data: Bryk

REML criterion at convergence: 46503.7

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.15926 -0.72319  0.01704  0.75445  2.95822 

Random effects:
 Groups   Name        Variance Std.Dev. Corr
 school   (Intercept)  2.3796  1.5426       
          cses         0.1012  0.3181   0.39
 Residual             36.7212  6.0598       
Number of obs: 7185, groups:  school, 160

Fixed effects:
                    Estimate Std. Error t value
(Intercept)          12.1279     0.1993   60.86
meanses               5.3329     0.3692   14.45
cses                  2.9450     0.1556   18.93
sectorCatholic        1.2266     0.3063    4.00
meanses:cses          1.0392     0.2989    3.48
cses:sectorCatholic  -1.6427     0.2398   -6.85

Correlation of Fixed Effects:
            (Intr) meanss cses   sctrCt mnss:c
meanses      0.256                            
cses         0.075  0.019                     
sectorCthlc -0.699 -0.356 -0.053              
meanses:css  0.019  0.074  0.293 -0.026       
css:sctrCth -0.052 -0.027 -0.696  0.077 -0.351

############## Doing inference  ###################
as you learned in Stat60 confidence intervals best

> confint(bryklmer)
Computing profile confidence intervals ...
                        2.5 %     97.5 %
.sig01               1.305038  1.7726803
.sig02              -1.000000  1.0000000
.sig03               0.000000  0.7386706
.sigma               5.959935  6.1624597
(Intercept)         11.738752 12.5170915
meanses              4.611987  6.0542063
cses                 2.640883  3.2490529
sectorCatholic       0.628395  1.8246971
meanses:cses         0.453632  1.6236103
cses:sectorCatholic -2.110957 -1.1738837
There were 50 or more warnings (use warnings() to see the first 50)

## bootstrap seems to be less unhappy, similar results
> confint(bryklmer,  method = "boot", nsim = 1000, boot.type = "perc")
Computing bootstrap confidence intervals ...
                                  2.5 %     97.5 %
sd_(Intercept)|school        1.30923925  1.7649178
cor_cses.(Intercept)|school -1.00000000  1.0000000
sd_cses|school               0.01898351  0.7263515
sigma                        5.95745865  6.1629184
(Intercept)                 11.74444056 12.5193563
meanses                      4.60475083  6.0838817
cses                         2.62694270  3.2347357
sectorCatholic               0.66206585  1.8424741
meanses:cses                 0.41124738  1.6173318
cses:sectorCatholic         -2.11546516 -1.1473849
> 

   
####################################################################################################################################################
===================================================================
Lab2 update: some useful advanced plots using xyplot from lattice graphics

#read in prepared Bryk data
> brykD = read.table("http://statweb.stanford.edu/~rag/stat209/brykdataPrepared", header = T)
## note it is safer to use statweb.stanford.edu in read.table even if the link URL is www-stat.stanford.edu
> attach(brykD)
> table(school)
school
1224 1288 1296 1308 1317 1358 1374 1433 1436 1461 1462 1477 1499 1637 1906 1909 1942 1946 2030 2208 2277 2305 2336 2458 2467 2526 2626 2629 2639 2651 2655 2658 
  47   25   48   20   48   30   28   35   44   33   57   62   53   27   53   28   29   39   47   60   61   67   47   57   52   57   38   57   42   38   52   45 
2755 2768 2771 2818 2917 2990 2995 3013 3020 3039 3088 3152 3332 3351 3377 3427 3498 3499 3533 3610 3657 3688 3705 3716 3838 3881 3967 3992 3999 4042 4173 4223 
  47   25   55   42   43   48   46   53   59   21   39   52   38   39   45   49   53   38   48   64   51   43   45   41   54   41   52   53   46   64   44   45 
4253 4292 4325 4350 4383 4410 4420 4458 4511 4523 4530 4642 4868 4931 5192 5404 5619 5640 5650 5667 5720 5761 5762 5783 5815 5819 5838 5937 6074 6089 6144 6170 
  58   65   53   33   25   41   32   48   58   47   63   61   34   58   28   57   66   57   45   61   53   52   37   29   25   50   31   29   56   33   43   21 
6291 6366 6397 6415 6443 6464 6469 6484 6578 6600 6808 6816 6897 6990 7011 7101 7172 7232 7276 7332 7341 7342 7345 7364 7635 7688 7697 7734 7890 7919 8009 8150 
  35   58   60   54   30   29   57   35   56   56   44   55   49   53   33   28   44   52   53   48   51   58   56   44   51   54   32   22   51   37   47   44 
8165 8175 8188 8193 8202 8357 8367 8477 8531 8627 8628 8707 8775 8800 8854 8857 8874 8946 8983 9021 9104 9158 9198 9225 9292 9340 9347 9359 9397 9508 9550 9586 
  49   33   30   43   35   27   14   37   41   53   61   48   48   32   32   64   36   58   51   56   55   53   31   36   19   29   57   53   47   35   29   59

> table(school[sector == "Public"])

1224 1288 1296 1358 1374 1461 1499 1637 1909 1942 1946 2030 2336 2467 2626 2639 2651 2655 2768 2771 2818 2917 2995 3013 3088 3152 3332 3351 3377 3657 3716 3881 
  47   25   48   30   28   33   53   27   28   29   39   47   47   52   38   42   38   52   25   55   42   43   46   53   39   52   38   39   45   51   41   41 
3967 3999 4325 4350 4383 4410 4420 4458 4642 5640 5762 5783 5815 5819 5838 5937 6089 6144 6170 6291 6397 6415 6443 6464 6484 6600 6808 6897 6990 7101 7232 7276 
  52   46   53   33   25   41   32   48   61   57   37   29   25   50   31   29   33   43   21   35   60   54   30   29   35   56   44   49   53   28   52   53 
7341 7345 7697 7734 7890 7919 8175 8188 8202 8357 8367 8477 8531 8627 8707 8775 8854 8874 8946 8983 9158 9225 9292 9340 9397 9550 
  51   56   32   22   51   37   33   30   35   27   14   37   41   53   48   48   32   36   58   51   53   36   19   29   47   29 
> table(school[sector == "Catholic"])

1308 1317 1433 1436 1462 1477 1906 2208 2277 2305 2458 2526 2629 2658 2755 2990 3020 3039 3427 3498 3499 3533 3610 3688 3705 3838 3992 4042 4173 4223 4253 4292 
  20   48   35   44   57   62   53   60   61   67   57   57   57   45   47   48   59   21   49   53   38   48   64   43   45   54   53   64   44   45   58   65 
4511 4523 4530 4868 4931 5192 5404 5619 5650 5667 5720 5761 6074 6366 6469 6578 6816 7011 7172 7332 7342 7364 7635 7688 8009 8150 8165 8193 8628 8800 8857 9021 
  58   47   63   34   58   28   57   66   45   61   53   52   56   58   57   56   55   33   44   48   58   44   51   54   47   44   49   43   61   32   64   56 
9104 9198 9347 9359 9508 9586 
  55   31   57   53   35   59 
> library(lattice)
> library(lme4)

#A. Ignore schools, and plot all Cath and Public student with an "overall" regression smoother
 xyplot(mathach ~ cses | sector, brykD, type = c("g", "p", "smooth"), aspect = .5)
# another version all in one frame
xyplot(mathach ~ cses, groups = sector, brykD, type = c("g", "p", "smooth"), aspect = .5)
# now overlay straight-line regression (ignoring schools) instead of smoother
xyplot(mathach ~ cses, groups = sector, brykD, type = c("g", "p", "r"), aspect = .5, col = c("black", "red"))
# you can see, even ignoring schools, Pub (red) is steeper and lower in mean ach than Cath

# B. Do school by school plots
xyplot(mathach ~ cses | school, brykD, type = c("g","p","r"),  subset = sector == "Catholic", xlab = "C_centered ses", ylab = "C_Math", aspect = .5)
# this gives frame-by-frame for 70 Cath schools with straight-line fit
 xyplot(mathach ~ cses | school, brykD, type = c("g","p","r"),  subset = sector == "Public", xlab = "centered ses", ylab = "Math", aspect = .5)
# same for 90 public schools
# These plots look fine if you can stretch them out across a couple big monitors.
# As an alternative try doing these for the subset of the first 26 schools
> xyplot(mathach ~ cses | school, brykD[1:1154,], type = c("g","p","r"),  subset = sector == "Public", xlab = "centered ses", ylab = "Math", aspect = .5)
> xyplot(mathach ~ cses | school, brykD[1:1154,], type = c("g","p","r"),  subset = sector == "Catholic", xlab = "centered ses", ylab = "Math", aspect = .5)

# C. Collection of school fits
# plots of school fits for each sector
xyplot(mathach ~ cses, groups = school, brykD, type = c("r"),  subset = sector == "Catholic", xlab = "C_centered ses", ylab = "C_Math", aspect = .5, col = c("black"))
xyplot(mathach ~ cses, groups = school, brykD, type = c("r"),  subset = sector == "Public", xlab = "P_centered ses", ylab = "P_Math", aspect = .5, col = c("black"))
# or do 2 frames on a page, with or without data points
xyplot(mathach ~ cses | sector, groups = school, data = brykD, type = c("g","p","r"), index = function(x,y) coef(lm(y ~ x))[1], xlab = "centered ses", ylab = "Math", aspect = .5, col = c("black"))
xyplot(mathach ~ cses | sector, groups = school, data = brykD, type = c("r"), index = function(x,y) coef(lm(y ~ x))[1], xlab = "centered ses", ylab = "Math", aspect = .5, col = c("black"))

# D. Confint plots for lmList fits
>   cathRegC = lmList(mathach ~ cses| school, subset = sector == "Catholic", data = brykD)
>   pubRegC = lmList(mathach ~ cses| school, subset = sector == "Public", data = brykD)
>  length(cathRegC); length(pubRegC)
[1] 70
[1] 90
> pubcoef= coef(pubRegC)
>  cathcoef= coef(cathRegC)
# plots done in Lab text with nlme often shown in expositions, for each sector
> plot(confint(cathRegC, pool = TRUE), order = 1)
> 
> plot(confint(pubRegC, pool = TRUE), order = 1)
---------------------------
#### some of the plots generated above are posted here
http://web.stanford.edu/~rag/ed401d/brykplots.pdf
-----------------------
END extra plots section

































