############################## # The value of using multilevel models # Example: Repeated Measures and Clustering in sections # Author: Elli Theobald: ellij@uw.edu # Full citation: Students are rarely independent: When, why, and how to use random effects in discipline-based education research ############################## # load libraries # lme4 contains the function (lmer) to fit multilevel models library(lme4) # set working directory setwd("") # read in the data mydata <- read.csv("RepeatedStudents.csv",head=T) head(mydata) # data description: # 2 sections (A and B), 2 treatments (treatment=1 and control=0) # each section got 3 doses: either 2 treatments and 1 control (B) or 2 controls and 1 treatment (A) table(mydata$Section,mydata$Treatment,mydata$Topic) ############## ### ICC ### ############## # Student Random Effect mod0_Stu <- lmer(PostScore ~ 1 + (1|StudentID), data=mydata) summary(mod0_Stu) # makes random effect portion of model output a dataframe out_mod0_Stu <- as.data.frame(VarCorr(mod0_Stu),head=TRUE) # calculates Intraclass Correlation (ICC) btwn_stu <- out_mod0_Stu$vcov[out_mod0_Stu$grp=='StudentID'] # between cluster variation win_stu <- out_mod0_Stu$vcov[out_mod0_Stu$grp=='Residual'] # within cluster variance (ICC_stu <- btwn_stu/(win_stu + btwn_stu)) # calculates and prints ICC # Section Random Effect mod0_Sect <- lmer(PostScore ~ 1 + (1|Section), data=mydata) summary(mod0_Sect) # makes random effect portion of model output a dataframe out_mod0_Sect <- as.data.frame(VarCorr(mod0_Sect),head=TRUE) # calculates Intraclass Correlation for Section (ICC) btwn_sect <- out_mod0_Sect$vcov[out_mod0_Sect$grp=='Section'] # between cluster variation win_sect <- out_mod0_Sect$vcov[out_mod0_Sect$grp=='Residual'] # within cluster variance (ICC_sect <- btwn_sect/(win_sect + btwn_sect)) # calculates and prints ICC # Interpretation: # Based on ICC, it is most important to test StudentID as a random effect # and less important to test Section as a random effect # Prediction: model selection will favor including StudentID and excluding Section ############################################################ #### Model selection as recommended by Zuur et al. 2009 #### ############################################################ # Determine the best random effects structure ############## ### Step 1 ### ############## # Fit the most complex fixed-effects only model that explicitly tests the hypothesis # Model 1: tests the hypothesis that treatment impacts postscore # note that this is fit in R's base package with the function lm (not lmer in the lme4 package) mod1 <- lm(PostScore ~ PreScore + Treatment + Topic, data=mydata) summary(mod1) AIC(mod1) ############## ### Step 2 ### ############## # Test all combinations of random effect structures # set REML=T (this is the default) # Model 2: student random effect (repeated measures) and section random effect (clustering) mod2 <- lmer(PostScore ~ PreScore + Treatment + Topic + (1|StudentID) + (1|Section), data=mydata, REML=T) summary(mod2) AIC(mod2) # Model 3: student random effect only (repeated measures) mod3 <- lmer(PostScore ~ PreScore + Treatment + Topic + (1|StudentID), data=mydata, REML=T) summary(mod3) AIC(mod3) # Model 4: section random effect only (clustering) mod4 <- lmer(PostScore ~ PreScore + Treatment + Topic + (1|Section), data=mydata, REML=T) summary(mod4) AIC(mod4) ############## ### Step 3 ### ############## # compare all models from steps 1 and 2 using AIC AIC(mod1, mod2, mod3, mod4) # mod3 has the lowest AIC ############################################################### #### Backwards Model selection to Select the Fixed Effects #### ############################################################### # Determine the best fixed effects that explain the PostScore # Start with the best model from above (mod3) # specify REML=F mod3F <- lmer(PostScore ~ PreScore + Treatment + Topic + (1|StudentID), data=mydata, REML=F) summary(mod3F) AIC(mod3F) # Remove fixed effects singularly # If AIC increases, put the fixed effect back in # Remove Treatment because treatment has the smallest effect mod4F <- lmer(PostScore ~ PreScore + Topic + (1|StudentID), data=mydata, REML=F) summary(mod4F) AIC(mod4F) # AIC increases so add Treatment back in # Remove Topic because topic has the next smallest effect mod5F <- lmer(PostScore ~ PreScore + Treatment + (1|StudentID), data=mydata, REML=F) summary(mod5F) AIC(mod5F) # AIC increases so add Topic back in # Remove PreScore because PreScore has the next smallest effect mod6F <- lmer(PostScore ~ Treatment + Topic + (1|StudentID), data=mydata, REML=F) summary(mod6F) AIC(mod6F) # AIC increases so add Topic back in # Refit the final model for interpretation, with REML=T modFinal <- lmer(PostScore ~ PreScore + Treatment + Topic + (1|StudentID), data=mydata, REML=T) summary(modFinal) AIC(modFinal) ###################### ### Interpretation ### ###################### # The best fitting model (with the lowest AIC) includes a student random effect (to account for the repeated measures) # and PreScore, Topic, and Treatment as fixed effects.