library(ggplot2)
library(brms)
df = read.csv('files_practice/loud_trials.csv', stringsAsFactors = TRUE)
summary(df)
## file subject cond
## a_ama_1_baseSelf_1_tc.wav : 2 amc : 42 aggressive:266
## a_ama_1_baseWhere_1_tc.wav : 2 mtm : 42 base : 89
## a_ama_2_submissive_1_tc.wav : 2 clh : 38 fight :116
## a_ama_2_submissive_2_tc.wav : 2 did : 38 loud : 60
## a_ama_2_submissive_3_tc.wav : 2 mta : 38 submissive:271
## a_ama_3_aggressive_1_t40.wav: 2 pam : 38
## (Other) :790 (Other):566
## pitch dB sex
## Min. : 72.0 Min. : 40.14 F:552
## 1st Qu.: 205.0 1st Qu.: 62.36 M:250
## Median : 267.0 Median : 72.37
## Mean : 292.7 Mean : 74.65
## 3rd Qu.: 350.0 3rd Qu.: 85.12
## Max. :1308.0 Max. :115.85
## NA's :20
Note the NAs - some missing values will be dropped when modeling.
A little trick: let’s set the reference level of “cond” to “base” to make it easier to interpret the regression coefficients. We could also drop “loud” and “fight” for the first analysis.
df$cond = factor(df$cond, levels = c('base', 'submissive', 'aggressive', 'loud', 'fight'))
table(df$cond) # now "base" is the first level
##
## base submissive aggressive loud fight
## 89 271 266 60 116
A quick preview:
boxplot(dB ~ cond, df)
boxplot(dB ~ sex, df)
boxplot(dB ~ cond + sex, df)
Model structure: we have repeated observations from the same subject,
so let’s at least fit random intercepts (1|subject), and we could also
let the effect of condition vary per subject (random slopes). We can
also control for sex, just to be on the safe side (although there are no
major sex differences in loudness). I included some reasonable priors,
but there is enough data here that we don’t have to worry about priors
too much - the models will converge just fine with default flat priors.
Note that file
is not a clustering level because each
recording occurs only once.
# check what priors are available for this type of model
# get_prior(dB ~ cond + (cond|subject), data = df, family = 'gaussian')
prior = c(
set_prior('normal(80, 30)', class = 'Intercept'),
# 80 dB ~ loudness of voice in conversation
set_prior('normal(0, 20)', class = 'b')
# expect no more than ±20-40 dB across conditions
)
mod_dB = brm(
dB ~ cond + sex + (cond|subject),
data = df, family = 'gaussian',
warmup = 500, iter = 2000, chains = 3, cores = 3,
# sample_prior = 'only', # good for checking what your prior predicts before seeing any data
prior = prior,
file = 'files_practice/dB~cond'
)
Check convergence:
plot(mod_dB) # converged well
summary(mod_dB) # converged well
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: dB ~ cond + sex + (cond | subject)
## Data: df (Number of observations: 802)
## Draws: 3 chains, each with iter = 2000; warmup = 500; thin = 1;
## total post-warmup draws = 4500
##
## Group-Level Effects:
## ~subject (Number of levels: 36)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 4.39 0.87 2.96 6.31 1.00
## sd(condsubmissive) 4.54 0.96 2.91 6.64 1.00
## sd(condaggressive) 7.59 1.24 5.49 10.34 1.00
## sd(condloud) 8.26 1.39 5.93 11.38 1.00
## sd(condfight) 8.78 1.43 6.42 11.91 1.00
## cor(Intercept,condsubmissive) -0.06 0.24 -0.50 0.42 1.00
## cor(Intercept,condaggressive) -0.25 0.20 -0.60 0.18 1.00
## cor(condsubmissive,condaggressive) -0.12 0.22 -0.55 0.31 1.00
## cor(Intercept,condloud) -0.08 0.22 -0.49 0.35 1.00
## cor(condsubmissive,condloud) -0.14 0.23 -0.58 0.32 1.00
## cor(condaggressive,condloud) 0.50 0.17 0.13 0.77 1.00
## cor(Intercept,condfight) 0.00 0.22 -0.41 0.43 1.00
## cor(condsubmissive,condfight) -0.39 0.21 -0.74 0.07 1.00
## cor(condaggressive,condfight) 0.34 0.19 -0.06 0.65 1.00
## cor(condloud,condfight) 0.65 0.15 0.29 0.87 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 2676 3193
## sd(condsubmissive) 2273 3078
## sd(condaggressive) 1950 2840
## sd(condloud) 1878 2632
## sd(condfight) 2872 3172
## cor(Intercept,condsubmissive) 2050 2539
## cor(Intercept,condaggressive) 1397 2138
## cor(condsubmissive,condaggressive) 1369 2027
## cor(Intercept,condloud) 1665 2439
## cor(condsubmissive,condloud) 1414 2326
## cor(condaggressive,condloud) 2086 2961
## cor(Intercept,condfight) 1984 2790
## cor(condsubmissive,condfight) 1755 2596
## cor(condaggressive,condfight) 3016 3502
## cor(condloud,condfight) 2612 3662
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 63.73 1.18 61.48 66.09 1.00 1616 2641
## condsubmissive -3.11 1.01 -5.12 -1.13 1.00 2670 3130
## condaggressive 14.09 1.61 10.80 17.21 1.00 1738 2381
## condloud 28.13 1.63 24.81 31.27 1.00 2408 3265
## condfight 28.01 1.78 24.50 31.50 1.00 2994 2811
## sexM 3.93 1.79 0.49 7.57 1.00 2068 2950
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 3.88 0.11 3.68 4.10 1.00 5471 3205
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_dB) # looks fine
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
Plot with observed data to make sure the model makes sense:
plot(conditional_effects(mod_dB), points = TRUE)
Difference between conditions
summary(mod_dB)$fixed
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 63.732439 1.176199 61.4773963 66.094538 1.0006795 1616.240
## condsubmissive -3.109365 1.008379 -5.1183504 -1.129500 1.0002873 2669.678
## condaggressive 14.091142 1.606538 10.8049220 17.208217 1.0017486 1737.698
## condloud 28.126879 1.626135 24.8144346 31.273799 0.9995688 2408.063
## condfight 28.014680 1.781278 24.4961895 31.501380 1.0008683 2993.996
## sexM 3.925701 1.790795 0.4917277 7.571547 0.9996830 2067.544
## Tail_ESS
## Intercept 2640.793
## condsubmissive 3129.911
## condaggressive 2381.464
## condloud 3265.237
## condfight 2811.078
## sexM 2949.972
Thus: 14.1 [10.8, 17.2] dB louder in “aggressive” vs “base” and 3.1 [1.1, 5.1] dB quieter in “submissive” vs “base”
boxplot(pitch ~ cond, df)
boxplot(pitch ~ cond + sex, df)
A major sex difference in pitch, so we can definitely include sex as a covariate (if we don’t, it’s actually not such a disaster because the sex-related variance is also captured by the random intercepts per subject). We could also log-transform the pitch and think of reasonable priors, but we don’t really have to.
prior = c(
set_prior('normal(150, 50)', class = 'Intercept'),
set_prior('normal(0, 100)', class = 'b')
)
mod_pitch = brm(
pitch ~ cond + sex + (cond|subject),
data = df, family = 'gaussian',
warmup = 500, iter = 2000, chains = 3, cores = 3,
prior = prior,
file = 'files_practice/pitch~cond'
)
Check convergence:
summary(mod_pitch)
## Family: gaussian
## Links: mu = identity; sigma = identity
## Formula: pitch ~ cond + sex + (cond | subject)
## Data: df (Number of observations: 782)
## Draws: 3 chains, each with iter = 2000; warmup = 500; thin = 1;
## total post-warmup draws = 4500
##
## Group-Level Effects:
## ~subject (Number of levels: 36)
## Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept) 6.04 4.81 0.22 18.06 1.00
## sd(condsubmissive) 76.67 12.47 56.27 104.43 1.00
## sd(condaggressive) 72.67 11.86 52.55 98.77 1.00
## sd(condloud) 168.77 23.23 129.20 219.03 1.00
## sd(condfight) 119.44 20.40 86.36 167.06 1.00
## cor(Intercept,condsubmissive) -0.09 0.40 -0.77 0.69 1.00
## cor(Intercept,condaggressive) 0.08 0.40 -0.69 0.79 1.01
## cor(condsubmissive,condaggressive) -0.29 0.19 -0.63 0.10 1.00
## cor(Intercept,condloud) 0.13 0.40 -0.69 0.81 1.01
## cor(condsubmissive,condloud) 0.04 0.21 -0.36 0.44 1.00
## cor(condaggressive,condloud) 0.30 0.20 -0.13 0.64 1.00
## cor(Intercept,condfight) 0.07 0.39 -0.68 0.76 1.02
## cor(condsubmissive,condfight) 0.22 0.19 -0.18 0.56 1.00
## cor(condaggressive,condfight) 0.44 0.17 0.07 0.73 1.00
## cor(condloud,condfight) 0.03 0.22 -0.39 0.44 1.00
## Bulk_ESS Tail_ESS
## sd(Intercept) 1790 2647
## sd(condsubmissive) 2296 3012
## sd(condaggressive) 1857 2891
## sd(condloud) 1907 3151
## sd(condfight) 1920 2828
## cor(Intercept,condsubmissive) 220 645
## cor(Intercept,condaggressive) 237 710
## cor(condsubmissive,condaggressive) 1700 2585
## cor(Intercept,condloud) 203 424
## cor(condsubmissive,condloud) 1815 2804
## cor(condaggressive,condloud) 1713 2447
## cor(Intercept,condfight) 250 458
## cor(condsubmissive,condfight) 2268 3199
## cor(condaggressive,condfight) 2369 2957
## cor(condloud,condfight) 2441 3156
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 216.19 6.54 203.28 228.86 1.00 3560 3139
## condsubmissive 55.34 16.75 22.09 87.10 1.00 1427 2457
## condaggressive 77.27 16.16 44.10 106.67 1.00 1256 1812
## condloud 212.06 31.01 148.97 270.85 1.00 2416 2752
## condfight 205.50 27.41 149.43 256.71 1.00 1652 1905
## sexM -101.26 10.45 -121.88 -80.71 1.00 2850 2755
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 51.06 1.38 48.48 53.90 1.00 5089 2949
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_pitch)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.
plot(conditional_effects(mod_pitch), points = TRUE)
Difference between conditions
summary(mod_pitch)$fixed
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept 216.18785 6.537776 203.27754 228.85792 1.000337 3559.862
## condsubmissive 55.33748 16.745786 22.09187 87.09736 1.000427 1426.541
## condaggressive 77.27429 16.159872 44.09602 106.67291 1.001323 1256.445
## condloud 212.05693 31.010234 148.97281 270.85135 1.001073 2416.350
## condfight 205.49914 27.413642 149.43400 256.71223 1.000363 1651.716
## sexM -101.26035 10.447811 -121.87843 -80.71066 1.000963 2849.942
## Tail_ESS
## Intercept 3138.967
## condsubmissive 2456.992
## condaggressive 1811.586
## condloud 2752.479
## condfight 1905.329
## sexM 2754.733
Thus: 55 [22, 87] Hz higher pitch in “submissive” vs “base” and 77 [44, 107] Hz higher pitch in “aggressive” vs “base”
Possible text to report: “Controlling for the speaker’s sex, submissive vocalizations were more high-pitched (by 55 Hz [22, 87]) and quieter (by 3.1 dB [1.1, 5.1]) compared to baseline. Aggressive vocalizations were also more high-pitched than the baseline (by 77 Hz [44, 107]), but - unlike submissive vocalizations - they were much louder than baseline (by 14.1 dB [10.8, 17.2]).”
Extract fitted values
newdata = expand.grid(cond = levels(df$cond), sex = unique(df$sex))
fit_dB = fitted(mod_dB, newdata = newdata, re_formula = NA, robust = TRUE)
colnames(fit_dB) = c('fit', 'se', 'lwr', 'upr')
pl_dB = cbind(newdata, fit_dB)
pl_dB$cond2 = factor(pl_dB$cond, levels = c('base', 'submissive', 'aggressive', 'fight', 'loud'))
p1_dB = ggplot(pl_dB, aes(fit, cond, xmin = lwr, xmax = upr, shape = sex, color = sex)) +
geom_pointrange(position = position_dodge(.2)) +
xlab('SPL (dB)') +
ylab('') +
ggtitle('dB: fitted') +
theme_bw() +
theme(panel.grid = element_blank())
# p1_dB
Contrasts compared to “base” condition
cntr_dB = newdata[newdata$cond != 'base' &
newdata$sex == 'M',
# enough to show for one sex since there is no interaction
, drop = FALSE]
fit_dB2 = fitted(mod_dB, newdata = newdata, re_formula = NA, summary = FALSE)
idx_base = which(newdata$cond == 'base')
for (i in 1:nrow(cntr_dB)) {
d = fit_dB2[, which(newdata$cond == cntr_dB$cond[i])] - fit_dB2[, idx_base]
cntr_dB[i, c('fit', 'lwr', 'upr')] = quantile(d, probs = c(.5, .025, .975)) # median + 95% CI
cntr_dB$postProb[i] = mean(d > 0) # % of posterior distribution that is positive
}
cntr_dB$cond = factor(cntr_dB$cond, levels = c('submissive', 'aggressive', 'fight', 'loud'))
p2_dB = ggplot(cntr_dB, aes(fit, cond, xmin = lwr, xmax = upr)) +
geom_pointrange() +
geom_vline(xintercept = 0, linetype = 3) +
xlab('SPL (dB)') +
ylab('') +
ggtitle('dB: contrast') +
theme_bw() +
theme(panel.grid = element_blank())
# p2_dB
fit_pitch = fitted(mod_pitch, newdata = newdata, re_formula = NA, robust = TRUE)
colnames(fit_pitch) = c('fit', 'se', 'lwr', 'upr')
pl_pitch = cbind(newdata, fit_pitch)
pl_pitch$cond = factor(pl_pitch$cond, levels = c('base', 'submissive', 'aggressive', 'fight', 'loud'))
p1_pitch = ggplot(pl_pitch, aes(fit, cond, xmin = lwr, xmax = upr, shape = sex, color = sex)) +
geom_pointrange() +
scale_x_continuous(name = 'Pitch (Hz)', trans = 'log2') +
ylab('') +
ggtitle('Pitch: fitted') +
theme_bw() +
theme(panel.grid = element_blank(), axis.text.y = element_blank())
# p1_pitch
Contrasts from base
cntr_pitch = newdata[newdata$cond != 'base' &
newdata$sex == 'M',
# enough to show for one sex since there is no interaction
, drop = FALSE]
fit_pitch2 = fitted(mod_pitch, newdata = newdata, re_formula = NA, summary = FALSE)
idx_base = which(newdata$cond == 'base')
for (i in 1:nrow(cntr_pitch)) {
d = fit_pitch2[, which(newdata$cond == cntr_pitch$cond[i])] - fit_pitch2[, idx_base]
cntr_pitch[i, c('fit', 'lwr', 'upr')] = quantile(d, probs = c(.5, .025, .975))
cntr_pitch$postProb[i] = mean(d > 0)
}
cntr_pitch$cond = factor(cntr_pitch$cond, levels = c('submissive', 'aggressive', 'fight', 'loud'))
p2_pitch = ggplot(cntr_pitch, aes(fit, cond, xmin = lwr, xmax = upr)) +
geom_pointrange() +
geom_vline(xintercept = 0, linetype = 3) +
scale_x_continuous(name = 'Pitch (Hz)', trans = 'log2') +
ylab('') +
ggtitle('Pitch: contrast') +
theme_bw() +
theme(panel.grid = element_blank(), axis.text.y = element_blank())
# p2_pitch
Common plot: put multiple ggplot’s together with the “patchwork” package
library(patchwork)
p1_dB + p1_pitch +
p2_dB + p2_pitch +
plot_layout(nrow = 2, ncol = 2, byrow = TRUE)
## Warning: Transformation introduced infinite values in continuous x-axis
# ggsave('conds.png', width = 20, height = 12, units = 'cm', dpi = 600)
We can also get the contrasts between conditions with 95% CI and % of posterior probability
cntr_dB
## cond sex fit lwr upr postProb
## 7 submissive M -3.108641 -5.121652 -1.12835 0.0008888889
## 8 aggressive M 14.117974 10.798200 17.21316 1.0000000000
## 9 loud M 28.147472 24.809021 31.27399 1.0000000000
## 10 fight M 28.028506 24.492424 31.51625 1.0000000000
cntr_pitch
## cond sex fit lwr upr postProb
## 7 submissive M 55.68120 22.08594 87.12835 0.9997778
## 8 aggressive M 77.97644 44.09334 106.70834 1.0000000
## 9 loud M 213.00768 148.91122 270.93357 1.0000000
## 10 fight M 206.09240 149.39979 256.81564 1.0000000