Preparation

Load the necessary libraries and functions

library(ggplot2)
library(brms)

load the data

df = read.csv('files_practice/loud_trials.csv', stringsAsFactors = TRUE)
summary(df)
##                            file        subject            cond    
##  a_ama_1_baseSelf_1_tc.wav   :  2   amc    : 42   aggressive:266  
##  a_ama_1_baseWhere_1_tc.wav  :  2   mtm    : 42   base      : 89  
##  a_ama_2_submissive_1_tc.wav :  2   clh    : 38   fight     :116  
##  a_ama_2_submissive_2_tc.wav :  2   did    : 38   loud      : 60  
##  a_ama_2_submissive_3_tc.wav :  2   mta    : 38   submissive:271  
##  a_ama_3_aggressive_1_t40.wav:  2   pam    : 38                   
##  (Other)                     :790   (Other):566                   
##      pitch              dB         sex    
##  Min.   :  72.0   Min.   : 40.14   F:552  
##  1st Qu.: 205.0   1st Qu.: 62.36   M:250  
##  Median : 267.0   Median : 72.37          
##  Mean   : 292.7   Mean   : 74.65          
##  3rd Qu.: 350.0   3rd Qu.: 85.12          
##  Max.   :1308.0   Max.   :115.85          
##  NA's   :20

Note the NAs - some missing values will be dropped when modeling.

A little trick: let’s set the reference level of “cond” to “base” to make it easier to interpret the regression coefficients. We could also drop “loud” and “fight” for the first analysis.

df$cond = factor(df$cond, levels = c('base', 'submissive', 'aggressive', 'loud', 'fight'))
table(df$cond)  # now "base" is the first level
## 
##       base submissive aggressive       loud      fight 
##         89        271        266         60        116

Question: How do voice pitch and loudness change when a person is trying to sound submissive or aggressive compared to normal, relaxed voice?

dB

A quick preview:

boxplot(dB ~ cond, df)  

boxplot(dB ~ sex, df) 

boxplot(dB ~ cond + sex, df)

Model structure: we have repeated observations from the same subject, so let’s at least fit random intercepts (1|subject), and we could also let the effect of condition vary per subject (random slopes). We can also control for sex, just to be on the safe side (although there are no major sex differences in loudness). I included some reasonable priors, but there is enough data here that we don’t have to worry about priors too much - the models will converge just fine with default flat priors. Note that file is not a clustering level because each recording occurs only once.

# check what priors are available for this type of model
# get_prior(dB ~ cond + (cond|subject), data = df, family = 'gaussian')
prior = c(
  set_prior('normal(80, 30)', class = 'Intercept'),  
  # 80 dB ~ loudness of voice in conversation
  set_prior('normal(0, 20)', class = 'b')            
  # expect no more than ±20-40 dB across conditions
)
mod_dB = brm(
  dB ~ cond + sex + (cond|subject),  
  data = df, family = 'gaussian', 
  warmup = 500, iter = 2000, chains = 3, cores = 3, 
  # sample_prior = 'only',  # good for checking what your prior predicts before seeing any data
  prior = prior,
  file = 'files_practice/dB~cond'
)

Check convergence:

plot(mod_dB)   # converged well

summary(mod_dB)  # converged well
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: dB ~ cond + sex + (cond | subject) 
##    Data: df (Number of observations: 802) 
##   Draws: 3 chains, each with iter = 2000; warmup = 500; thin = 1;
##          total post-warmup draws = 4500
## 
## Group-Level Effects: 
## ~subject (Number of levels: 36) 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                          4.39      0.87     2.96     6.31 1.00
## sd(condsubmissive)                     4.54      0.96     2.91     6.64 1.00
## sd(condaggressive)                     7.59      1.24     5.49    10.34 1.00
## sd(condloud)                           8.26      1.39     5.93    11.38 1.00
## sd(condfight)                          8.78      1.43     6.42    11.91 1.00
## cor(Intercept,condsubmissive)         -0.06      0.24    -0.50     0.42 1.00
## cor(Intercept,condaggressive)         -0.25      0.20    -0.60     0.18 1.00
## cor(condsubmissive,condaggressive)    -0.12      0.22    -0.55     0.31 1.00
## cor(Intercept,condloud)               -0.08      0.22    -0.49     0.35 1.00
## cor(condsubmissive,condloud)          -0.14      0.23    -0.58     0.32 1.00
## cor(condaggressive,condloud)           0.50      0.17     0.13     0.77 1.00
## cor(Intercept,condfight)               0.00      0.22    -0.41     0.43 1.00
## cor(condsubmissive,condfight)         -0.39      0.21    -0.74     0.07 1.00
## cor(condaggressive,condfight)          0.34      0.19    -0.06     0.65 1.00
## cor(condloud,condfight)                0.65      0.15     0.29     0.87 1.00
##                                    Bulk_ESS Tail_ESS
## sd(Intercept)                          2676     3193
## sd(condsubmissive)                     2273     3078
## sd(condaggressive)                     1950     2840
## sd(condloud)                           1878     2632
## sd(condfight)                          2872     3172
## cor(Intercept,condsubmissive)          2050     2539
## cor(Intercept,condaggressive)          1397     2138
## cor(condsubmissive,condaggressive)     1369     2027
## cor(Intercept,condloud)                1665     2439
## cor(condsubmissive,condloud)           1414     2326
## cor(condaggressive,condloud)           2086     2961
## cor(Intercept,condfight)               1984     2790
## cor(condsubmissive,condfight)          1755     2596
## cor(condaggressive,condfight)          3016     3502
## cor(condloud,condfight)                2612     3662
## 
## Population-Level Effects: 
##                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept         63.73      1.18    61.48    66.09 1.00     1616     2641
## condsubmissive    -3.11      1.01    -5.12    -1.13 1.00     2670     3130
## condaggressive    14.09      1.61    10.80    17.21 1.00     1738     2381
## condloud          28.13      1.63    24.81    31.27 1.00     2408     3265
## condfight         28.01      1.78    24.50    31.50 1.00     2994     2811
## sexM               3.93      1.79     0.49     7.57 1.00     2068     2950
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     3.88      0.11     3.68     4.10 1.00     5471     3205
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_dB)  # looks fine
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

Plot with observed data to make sure the model makes sense:

plot(conditional_effects(mod_dB), points = TRUE)

Difference between conditions

summary(mod_dB)$fixed
##                 Estimate Est.Error   l-95% CI  u-95% CI      Rhat Bulk_ESS
## Intercept      63.732439  1.176199 61.4773963 66.094538 1.0006795 1616.240
## condsubmissive -3.109365  1.008379 -5.1183504 -1.129500 1.0002873 2669.678
## condaggressive 14.091142  1.606538 10.8049220 17.208217 1.0017486 1737.698
## condloud       28.126879  1.626135 24.8144346 31.273799 0.9995688 2408.063
## condfight      28.014680  1.781278 24.4961895 31.501380 1.0008683 2993.996
## sexM            3.925701  1.790795  0.4917277  7.571547 0.9996830 2067.544
##                Tail_ESS
## Intercept      2640.793
## condsubmissive 3129.911
## condaggressive 2381.464
## condloud       3265.237
## condfight      2811.078
## sexM           2949.972

Thus: 14.1 [10.8, 17.2] dB louder in “aggressive” vs “base” and 3.1 [1.1, 5.1] dB quieter in “submissive” vs “base”

Same for pitch

boxplot(pitch ~ cond, df)

boxplot(pitch ~ cond + sex, df)

A major sex difference in pitch, so we can definitely include sex as a covariate (if we don’t, it’s actually not such a disaster because the sex-related variance is also captured by the random intercepts per subject). We could also log-transform the pitch and think of reasonable priors, but we don’t really have to.

prior = c(
  set_prior('normal(150, 50)', class = 'Intercept'),  
  set_prior('normal(0, 100)', class = 'b')            
)
mod_pitch = brm(
  pitch ~ cond + sex + (cond|subject),   
  data = df, family = 'gaussian', 
  warmup = 500, iter = 2000, chains = 3, cores = 3,
  prior = prior,
  file = 'files_practice/pitch~cond'
)

Check convergence:

summary(mod_pitch)
##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: pitch ~ cond + sex + (cond | subject) 
##    Data: df (Number of observations: 782) 
##   Draws: 3 chains, each with iter = 2000; warmup = 500; thin = 1;
##          total post-warmup draws = 4500
## 
## Group-Level Effects: 
## ~subject (Number of levels: 36) 
##                                    Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                          6.04      4.81     0.22    18.06 1.00
## sd(condsubmissive)                    76.67     12.47    56.27   104.43 1.00
## sd(condaggressive)                    72.67     11.86    52.55    98.77 1.00
## sd(condloud)                         168.77     23.23   129.20   219.03 1.00
## sd(condfight)                        119.44     20.40    86.36   167.06 1.00
## cor(Intercept,condsubmissive)         -0.09      0.40    -0.77     0.69 1.00
## cor(Intercept,condaggressive)          0.08      0.40    -0.69     0.79 1.01
## cor(condsubmissive,condaggressive)    -0.29      0.19    -0.63     0.10 1.00
## cor(Intercept,condloud)                0.13      0.40    -0.69     0.81 1.01
## cor(condsubmissive,condloud)           0.04      0.21    -0.36     0.44 1.00
## cor(condaggressive,condloud)           0.30      0.20    -0.13     0.64 1.00
## cor(Intercept,condfight)               0.07      0.39    -0.68     0.76 1.02
## cor(condsubmissive,condfight)          0.22      0.19    -0.18     0.56 1.00
## cor(condaggressive,condfight)          0.44      0.17     0.07     0.73 1.00
## cor(condloud,condfight)                0.03      0.22    -0.39     0.44 1.00
##                                    Bulk_ESS Tail_ESS
## sd(Intercept)                          1790     2647
## sd(condsubmissive)                     2296     3012
## sd(condaggressive)                     1857     2891
## sd(condloud)                           1907     3151
## sd(condfight)                          1920     2828
## cor(Intercept,condsubmissive)           220      645
## cor(Intercept,condaggressive)           237      710
## cor(condsubmissive,condaggressive)     1700     2585
## cor(Intercept,condloud)                 203      424
## cor(condsubmissive,condloud)           1815     2804
## cor(condaggressive,condloud)           1713     2447
## cor(Intercept,condfight)                250      458
## cor(condsubmissive,condfight)          2268     3199
## cor(condaggressive,condfight)          2369     2957
## cor(condloud,condfight)                2441     3156
## 
## Population-Level Effects: 
##                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept        216.19      6.54   203.28   228.86 1.00     3560     3139
## condsubmissive    55.34     16.75    22.09    87.10 1.00     1427     2457
## condaggressive    77.27     16.16    44.10   106.67 1.00     1256     1812
## condloud         212.06     31.01   148.97   270.85 1.00     2416     2752
## condfight        205.50     27.41   149.43   256.71 1.00     1652     1905
## sexM            -101.26     10.45  -121.88   -80.71 1.00     2850     2755
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma    51.06      1.38    48.48    53.90 1.00     5089     2949
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
pp_check(mod_pitch)
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

plot(conditional_effects(mod_pitch), points = TRUE)

Difference between conditions

summary(mod_pitch)$fixed
##                  Estimate Est.Error   l-95% CI  u-95% CI     Rhat Bulk_ESS
## Intercept       216.18785  6.537776  203.27754 228.85792 1.000337 3559.862
## condsubmissive   55.33748 16.745786   22.09187  87.09736 1.000427 1426.541
## condaggressive   77.27429 16.159872   44.09602 106.67291 1.001323 1256.445
## condloud        212.05693 31.010234  148.97281 270.85135 1.001073 2416.350
## condfight       205.49914 27.413642  149.43400 256.71223 1.000363 1651.716
## sexM           -101.26035 10.447811 -121.87843 -80.71066 1.000963 2849.942
##                Tail_ESS
## Intercept      3138.967
## condsubmissive 2456.992
## condaggressive 1811.586
## condloud       2752.479
## condfight      1905.329
## sexM           2754.733

Thus: 55 [22, 87] Hz higher pitch in “submissive” vs “base” and 77 [44, 107] Hz higher pitch in “aggressive” vs “base”

Possible text to report: “Controlling for the speaker’s sex, submissive vocalizations were more high-pitched (by 55 Hz [22, 87]) and quieter (by 3.1 dB [1.1, 5.1]) compared to baseline. Aggressive vocalizations were also more high-pitched than the baseline (by 77 Hz [44, 107]), but - unlike submissive vocalizations - they were much louder than baseline (by 14.1 dB [10.8, 17.2]).”

BONUS: nice custom plots

Extract fitted values

newdata = expand.grid(cond = levels(df$cond), sex = unique(df$sex))
fit_dB = fitted(mod_dB, newdata = newdata, re_formula = NA, robust = TRUE)
colnames(fit_dB) = c('fit', 'se', 'lwr', 'upr')
pl_dB = cbind(newdata, fit_dB)
pl_dB$cond2 = factor(pl_dB$cond, levels = c('base', 'submissive', 'aggressive', 'fight', 'loud'))
p1_dB = ggplot(pl_dB, aes(fit, cond, xmin = lwr, xmax = upr, shape = sex, color = sex)) +
  geom_pointrange(position = position_dodge(.2)) +
  xlab('SPL (dB)') +
  ylab('') +
  ggtitle('dB: fitted') +
  theme_bw() +
  theme(panel.grid = element_blank())
# p1_dB

Contrasts compared to “base” condition

cntr_dB = newdata[newdata$cond != 'base' &
                    newdata$sex == 'M',  
                  # enough to show for one sex since there is no interaction
                  , drop = FALSE]
fit_dB2 = fitted(mod_dB, newdata = newdata, re_formula = NA, summary = FALSE)
idx_base = which(newdata$cond == 'base')
for (i in 1:nrow(cntr_dB)) {
  d = fit_dB2[, which(newdata$cond == cntr_dB$cond[i])] - fit_dB2[, idx_base]
  cntr_dB[i, c('fit', 'lwr', 'upr')] = quantile(d, probs = c(.5, .025, .975))  # median + 95% CI
  cntr_dB$postProb[i] = mean(d > 0)  # % of posterior distribution that is positive
}
cntr_dB$cond = factor(cntr_dB$cond, levels = c('submissive', 'aggressive', 'fight', 'loud'))
p2_dB = ggplot(cntr_dB, aes(fit, cond, xmin = lwr, xmax = upr)) +
  geom_pointrange() +
  geom_vline(xintercept = 0, linetype = 3) +
  xlab('SPL (dB)') +
  ylab('') +
  ggtitle('dB: contrast') +
  theme_bw() +
  theme(panel.grid = element_blank())
# p2_dB
fit_pitch = fitted(mod_pitch, newdata = newdata, re_formula = NA, robust = TRUE)
colnames(fit_pitch) = c('fit', 'se', 'lwr', 'upr')
pl_pitch = cbind(newdata, fit_pitch)
pl_pitch$cond = factor(pl_pitch$cond, levels = c('base', 'submissive', 'aggressive', 'fight', 'loud'))
p1_pitch = ggplot(pl_pitch, aes(fit, cond, xmin = lwr, xmax = upr, shape = sex, color = sex)) +
  geom_pointrange() +
  scale_x_continuous(name = 'Pitch (Hz)', trans = 'log2') +
  ylab('') +
  ggtitle('Pitch: fitted') +
  theme_bw() +
  theme(panel.grid = element_blank(), axis.text.y = element_blank())
# p1_pitch

Contrasts from base

cntr_pitch = newdata[newdata$cond != 'base' &
                    newdata$sex == 'M',  
                  # enough to show for one sex since there is no interaction
                  , drop = FALSE]
fit_pitch2 = fitted(mod_pitch, newdata = newdata, re_formula = NA, summary = FALSE)
idx_base = which(newdata$cond == 'base')
for (i in 1:nrow(cntr_pitch)) {
  d = fit_pitch2[, which(newdata$cond == cntr_pitch$cond[i])] - fit_pitch2[, idx_base]
  cntr_pitch[i, c('fit', 'lwr', 'upr')] = quantile(d, probs = c(.5, .025, .975))
  cntr_pitch$postProb[i] = mean(d > 0) 
}
cntr_pitch$cond = factor(cntr_pitch$cond, levels = c('submissive', 'aggressive', 'fight', 'loud'))
p2_pitch = ggplot(cntr_pitch, aes(fit, cond, xmin = lwr, xmax = upr)) +
  geom_pointrange() +
  geom_vline(xintercept = 0, linetype = 3) +
  scale_x_continuous(name = 'Pitch (Hz)', trans = 'log2') +
  ylab('') +
  ggtitle('Pitch: contrast') +
  theme_bw() +
  theme(panel.grid = element_blank(), axis.text.y = element_blank())
# p2_pitch

Common plot: put multiple ggplot’s together with the “patchwork” package

library(patchwork)
p1_dB + p1_pitch + 
  p2_dB + p2_pitch + 
  plot_layout(nrow = 2, ncol = 2, byrow = TRUE)
## Warning: Transformation introduced infinite values in continuous x-axis

# ggsave('conds.png', width = 20, height = 12, units = 'cm', dpi = 600)

We can also get the contrasts between conditions with 95% CI and % of posterior probability

cntr_dB
##          cond sex       fit       lwr      upr     postProb
## 7  submissive   M -3.108641 -5.121652 -1.12835 0.0008888889
## 8  aggressive   M 14.117974 10.798200 17.21316 1.0000000000
## 9        loud   M 28.147472 24.809021 31.27399 1.0000000000
## 10      fight   M 28.028506 24.492424 31.51625 1.0000000000
cntr_pitch
##          cond sex       fit       lwr       upr  postProb
## 7  submissive   M  55.68120  22.08594  87.12835 0.9997778
## 8  aggressive   M  77.97644  44.09334 106.70834 1.0000000
## 9        loud   M 213.00768 148.91122 270.93357 1.0000000
## 10      fight   M 206.09240 149.39979 256.81564 1.0000000