11 March 2024
glm()
, model specification as before: glm(y~f1+x1+f2+x2, data=..., family=..., ...)
family=binomial
: proportions, out of a total number of counts; includes binary (Bernoulli) (“logistic regression”)family=poisson
: Poisson (independent counts, no set maximum, or far from the maximum)"gaussian"
), Gamma)glm
is Gaussianfamily=Gamma(link="log")
)Model setup is the same as linear models
but the linear relationship is set up on the link scale
log
/exp
): count dataqlogis
/plogis
)plot
is still somewhat usefulcor(observed,predict(model, type="response"))
)(obs-exp)/sqrt(V(exp))
]type="response"
to back-transformperformance::check_model()
, DHARMa
package are OK (simulateResiduals(model,plot=TRUE)
)family=quasipoisson
); fit, then adjust CIs/p-valuesMASS::glm.nb
)glmmTMB
package)plogis(0)=
0.5plogis(0+1)=
0.73summary()
), confidence intervals
drop1(model,test="Chisq")
, anova(model1,model2)
), profile confidence intervals (MASS::confint.glm
)lm
weights
argument)
cbind(successes,failures)
[not cbind(successes,total)
]weights=N
glm(p~...,data,weights=rep(N,nrow(data)))
)... + offset(log(A))
in R formulalink="cloglog"
(see here)ggplot2
geom_smooth(method="glm", method.args=list(family=...))
dotwhisker
, emmeans
, effects
, sjPlot
stat_sum
, position="jitter"
, geom_dotplot
, (beeswarm plot)glm()
problemsfamily
(\(\to\) linear model); using glm()
for linear models (unnecessary)data here
aids <- read.csv("../data/aids.csv") aids <- transform(aids, date=year+(quarter-1)/4) gg0 <- ggplot(aids,aes(date,cases))+geom_point()
gg1 <- gg0 + geom_smooth(method="glm",colour="red", formula=y~x, method.args=list(family="quasipoisson"))
g1 <- glm(cases~date, data = aids, family=quasipoisson(link="log")) summary(g1)
plot(g1)
)acf(residuals(g1)) ## check autocorrelation
library(DHARMa) g0 <- update(g1, family=poisson) plot(simulateResiduals(g0))
print(gg2 <- gg1+geom_smooth(method="glm",formula=y~poly(x,2), method.args=list(family="quasipoisson")))
g2 <- update(g1,.~poly(date,2))
acf(residuals(g2)) ## check autocorrelation
summary(g2)
## ## Call: ## glm(formula = cases ~ poly(date, 2), family = quasipoisson(link = "log"), ## data = aids) ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 3.86859 0.05004 77.311 < 2e-16 *** ## poly(date, 2)1 3.82934 0.25162 15.219 2.46e-11 *** ## poly(date, 2)2 -0.68335 0.19716 -3.466 0.00295 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## (Dispersion parameter for quasipoisson family taken to be 1.657309) ## ## Null deviance: 677.264 on 19 degrees of freedom ## Residual deviance: 31.992 on 17 degrees of freedom ## AIC: NA ## ## Number of Fisher Scoring iterations: 4
anova(g1,g2,test="F") ## for quasi-models specifically
## Analysis of Deviance Table ## ## Model 1: cases ~ date ## Model 2: cases ~ poly(date, 2) ## Resid. Df Resid. Dev Df Deviance F Pr(>F) ## 1 18 53.020 ## 2 17 31.992 1 21.028 12.688 0.002399 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Dobson, Annette J., and Adrian Barnett. 2008. An Introduction to Generalized Linear Models. 3rd ed. Chapman; Hall/CRC.