## fitting
library(lme4)
library(glmmTMB)
library(lattice)
## diagnostics etc.
library(broom.mixed)
library(DHARMa)
library(performance)
library(sjPlot)
library(dotwhisker)
##
library(ggplot2); theme_set(theme_bw())
library(cowplot)

Examples

Formulae

Formula Meaning
y ~ x No random effects
y ~ x + (1|g) The intercept is a random effect
y ~ x + (1|site/block) Nested random effects (block within site)
y ~ x + (1|site) + (1|year) Crossed random effects
y ~ x + (1|site:block) Interaction (only block within site)
y ~ x + site + (1|site:block) Fixed site, random block within site
y ~ x + (x|g) Intercept and slope are random effects
y ~ (x|g) Zero slope on average (weird!)
y ~ x + (1|g)+(0+x|g) Independent slope and intercept

Look at the data

q0 <- (ggplot(sleepstudy, aes(Days, Reaction, colour = Subject))
    + geom_point())  ## points only
print(q0+geom_line())

(Alternatives; connect, don’t colour; suppress legend; separate individuals into separate facets … for scatterplots, try ggalt::geom_encircle(). See examples here as well.)

Basic model fits

library(lme4)
## per-group fit (fixed)
lm1 <- lmList(Reaction~Days|Subject, data=sleepstudy)  ## stratified
lm1B <- lm(Reaction~Days*Subject, data=sleepstudy)     ## fixed effect
## random intercept
lm2 <- lmer(Reaction~Days+(1|Subject), data=sleepstudy)
## random slopes
lm3 <- lmer(Reaction~Days+(1+Days|Subject), data=sleepstudy) 

Should generally use random slopes where possible, i.e. when at least some individuals are measured on more than 1 day (Schielzeth and Forstmeier 2009)

Compute predictions

pp <- expand.grid(Days=0:9, Subject=levels(sleepstudy$Subject))
pp1 <- cbind(pp, Reaction=predict(lm1,newdata=pp))
pp2 <- cbind(pp, Reaction=predict(lm2,newdata=pp))
pp3 <- cbind(pp, Reaction=predict(lm3,newdata=pp))

plot predictions

theme_set(theme_classic()+theme(legend.position="none"))
plot_grid(q0+geom_line(data=pp2)+ggtitle("random intercept"),
          q0+geom_line(data=pp3)+ggtitle("random int&slope"),
          q0+geom_line(data=pp1)+ggtitle("fixed effects"),
          nrow=1)

diagnostics

plot(lm3)  ## fitted vs residual
## scale-location
plot(lm3, sqrt(abs(resid(.))) ~ fitted(.),
     type=c("p","smooth"), col.line="red")
## Q-Q plots
qqmath(lm3)
## or:
aa <- augment(lm3)
names(aa)
##  [1] "Reaction" "Days"     "Subject"  ".fitted"  ".resid"   ".hat"    
##  [7] ".cooksd"  ".fixed"   ".mu"      ".offset"  ".sqrtXwt" ".sqrtrwt"
## [13] ".weights" ".wtres"
## and then use ggplot.

Or use DHARMa::simulateResiduals(lm3, plot=TRUE), or performance::check_model()

coefficient plots

Use broom.mixed (in conjunction with dotwhisker::dwplot)

tt <- tidy(lm3, conf.int=TRUE, conf.method="profile")
## Computing profile confidence intervals ...
## Computing profile confidence intervals ...
dwplot(tt)

random effects plots

lattice::dotplot(ranef(lm3))
lattice::qqmath(ranef(lm3))

Or sjPlot::plot_model()

post-hoc stuff

emmeans, effects, car::Anova(), drop1, etc. all work …

model choice considerations


Maximal model often won’t actually work

e.g.

Random-slopes models: what does (x|g) really do?

\[ (\textrm{intercept}, \textrm{slope}) = \textrm{MVN}\left(\boldsymbol 0, \left[ \begin{array}{cc} \sigma^2_{\textrm{int}} & \sigma_{\textrm{int},\textrm{slope}} \\ \sigma_{\textrm{int},\textrm{slope}} & \sigma^2_{\textrm{slope}} \end{array} \right] \right) \]

equatiomatic packages:

## remotes::install_github('datalorax/equatiomatic')
if (require("equatiomatic", quietly = TRUE)) {
    library(equatiomatic)
    extract_eq(lm3)
}

\[ \begin{aligned} \operatorname{Reaction}_{i} &\sim N \left(\alpha_{j[i]} + \beta_{1j[i]}(\operatorname{Days}), \sigma^2 \right) \\ \left( \begin{array}{c} \begin{aligned} &\alpha_{j} \\ &\beta_{1j} \end{aligned} \end{array} \right) &\sim N \left( \left( \begin{array}{c} \begin{aligned} &\mu_{\alpha_{j}} \\ &\mu_{\beta_{1j}} \end{aligned} \end{array} \right) , \left( \begin{array}{cc} \sigma^2_{\alpha_{j}} & \rho_{\alpha_{j}\beta_{1j}} \\ \rho_{\beta_{1j}\alpha_{j}} & \sigma^2_{\beta_{1j}} \end{array} \right) \right) \text{, for Subject j = 1,} \dots \text{,J} \end{aligned} \]

What is a practical model?

How to simplify?

Convergence failures

m1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy)
aa <- allFit(m1)
## bobyqa : [OK]
## Nelder_Mead : [OK]
## nlminbwrap : [OK]
## nmkbw : [OK]
## optimx.L-BFGS-B : [OK]
## nloptwrap.NLOPT_LN_NELDERMEAD : [OK]
## nloptwrap.NLOPT_LN_BOBYQA : [OK]
ss <- summary(aa)
names(ss)
## [1] "which.OK" "msgs"     "fixef"    "llik"     "sdcor"    "theta"    "times"   
## [8] "feval"
ss$fixef
##                               (Intercept)     Days
## bobyqa                           251.4051 10.46729
## Nelder_Mead                      251.4051 10.46729
## nlminbwrap                       251.4051 10.46729
## nmkbw                            251.4051 10.46729
## optimx.L-BFGS-B                  251.4051 10.46729
## nloptwrap.NLOPT_LN_NELDERMEAD    251.4051 10.46729
## nloptwrap.NLOPT_LN_BOBYQA        251.4051 10.46729

GLMMs: integration techniques

GLMMs: more details

Other resources

References

Barr, Dale J., Roger Levy, Christoph Scheepers, and Harry J. Tily. 2013. “Random Effects Structure for Confirmatory Hypothesis Testing: Keep It Maximal.” Journal of Memory and Language 68 (3): 255–78. https://doi.org/10.1016/j.jml.2012.11.001.
Bates, Douglas, Reinhold Kliegl, Shravan Vasishth, and Harald Baayen. 2015. “Parsimonious Mixed Models.” arXiv:1506.04967 [Stat], June. http://arxiv.org/abs/1506.04967.
Matuschek, Hannes, Reinhold Kliegl, Shravan Vasishth, Harald Baayen, and Douglas Bates. 2017. “Balancing Type I Error and Power in Linear Mixed Models.” Journal of Memory and Language 94: 305–15. https://doi.org/10.1016/j.jml.2017.01.001.
Schielzeth, Holger, and Wolfgang Forstmeier. 2009. “Conclusions Beyond Support: Overconfident Estimates in Mixed Models.” Behavioral Ecology 20 (2): 416–20. https://doi.org/10.1093/beheco/arn145.