In this iteration Rebecca did a deep dive into the references and pulled out those that had clear indications what is defined as Basal metabolic rate. The dataset now includes columns that indicate which ones were fulfilled for each species.
We also curated the data by body mass data so the species included have somewhat equal range for aquatic and terrestrial. All body masses below 10kg and above about 7000 kg were removed. We have also removed the diet information to focus on the BMR and derived quantities.
Predictors: body mass interacting with habitat.
mammal_bmr <- read_csv("data/rmrdata.csv",
show_col_types = FALSE)
Rename some variables and create species name from genus and species.
mammal_bmr <- mammal_bmr |>
rename(mass_kg = mass,
log10_mass = logmb,
log10_bmr = logrmr) |>
rename_with(tolower) |>
mutate(order = str_to_title(order),
family = str_to_title(family),
genus = str_to_title(genus),
animal = paste(genus, species)
)
Keep only variables we will be using. And “factor” “chr” variables.
mammal_bmr <- mammal_bmr |>
dplyr::select(order,
family,
genus,
species,
mass_kg,
log10_mass,
log10_bmr,
habitat,
animal
) |>
mutate(across(where(is.character), factor)) |>
arrange(order, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10_bmr ~ log10_mass | habitat,
color = ~order,
# size = ~parse_number(n_animals),
text = ~animal,
data = mammal_bmr,
alpha = 0.5) |>
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) |>
gf_theme(scale_color_viridis_d('Order')) |>
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(BMR (kcal/day))')
my_scatter_plot
Figure 1.1:
plotly::ggplotly(my_scatter_plot) |>
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Figure 1.2:
Will include nested random effects of order/family (not genus/species due to lack of data; for example we have one row per species in the dataset). We are thus expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
# this code may generate warnings that are harmless
# https://stackoverflow.com/questions/67040472/warning-in-every-model-of-glmmtmb-givecsparse
re_model <- glmmTMB(log10_bmr ~ log10_mass * habitat +
(1 | order/family),
data = mammal_bmr)
tab_model(re_model)
log 10 bmr | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 2.04 | 1.77 – 2.30 | <0.001 |
log10 mass | 0.66 | 0.56 – 0.77 | <0.001 |
habitat [land] | -0.55 | -0.81 – -0.29 | <0.001 |
log10 mass × habitat [land] |
0.17 | 0.03 – 0.31 | 0.018 |
Random Effects | |||
σ2 | 0.01 | ||
τ00 family:order | 0.01 | ||
τ00 order | 0.03 | ||
ICC | 0.75 | ||
N family | 32 | ||
N order | 13 | ||
Observations | 63 | ||
Marginal R2 / Conditional R2 | 0.830 / 0.957 |
re_anova_results <- car::Anova(re_model)
names(re_anova_results)[3] <- 'p'
pander(re_anova_results)
Chisq | Df | p | |
---|---|---|---|
log10_mass | 323.4 | 1 | 2.654e-72 |
habitat | 20.74 | 1 | 5.27e-06 |
log10_mass:habitat | 5.618 | 1 | 0.01778 |
According to this model, the data provide very strong evidence that mass and habitat are associated with BMR (p = 2.7^{-72} and p = 5.3^{-6}), and moderate evidence of an interaction between body mass and habitat (p = 0.018).
Graphical checks below don’t provide evidence of any problems with the residual normality, residual independence, constant residual variance, and linearity conditions that have to be met for this model to be appropriate for the data.
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_bmr <- mammal_bmr |>
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_bmr, 'fitted-models/mr-data.RDS')
saveRDS(re_model, 'fitted-models/mr-re-model.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_bmr)
Figure 1.3:
acf(resid(re_model))
Figure 1.4:
gf_dhistogram(~re_resids, data = mammal_bmr, bins = 11) |>
gf_fitdistr()
Figure 1.5:
mammal_bmr <- mammal_bmr |>
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10_bmr ~ 10^log10_mass,
color = ~Habitat, data = mammal_bmr,
# size = 0.5,
# alpha = 0.4,
text = ~animal) |>
gf_line(10^re_ave_fitted ~ 10^log10_mass,
color = ~Habitat,
data = mammal_bmr,
text = ~Habitat,
alpha = 1) |>
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10_mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat,
alpha = 0.4) |>
gf_labs(x = 'Body Mass (kg)', y = 'BMR (kcal/day)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
gf_theme(scale_fill_manual(values = my_colors)) |>
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
) |>
gf_theme(plot.margin = unit(c(1,1,1,1), "cm"))
# Kleiber
# BMR (Kleiber, 1947): BMR (kcal/day) = 70 * body mass (kg) ^0.75
pub_models <- data.frame(mass = seq(from = 9, by = 10, to = 10000)) |>
mutate(kleiber_bmr = 70*mass^0.75)
re_preds <- re_preds |>
gf_line(kleiber_bmr ~ mass, data = pub_models, color = 'black',
text = ~"Kleiber",
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds |> gf_theme(legend.position='none') |> ggplotly(tooltip = 'text')
Figure 1.6: Observed and predicted BMR as a function of mass. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, and black line is based on Kleiber (1947).
re_preds |>
gf_labs(tag = "A")
gf_point(log10_bmr ~ re_ind_fitted, data = mammal_bmr,
alpha = 0.1) |>
gf_labs(y = 'Observed log10(BMR)',
x = 'Model-predicted, Order-and-Family-specific log10(BMR)',
title = 'Mixed-effects Model (RE of Order/Family)') |>
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Figure 1.7:
Predictors: body mass interacting with habitat.
mammal_fr <- read_csv("data/frdata-filter.csv",
show_col_types = FALSE) |>
janitor::remove_empty(which = 'rows')
Rename some variables and create species name from genus and species.
mammal_fr <- mammal_fr |>
rename(mass_kg = mass,
log10_mass = logmb,
breaths_per_min = fr,
log10_br = logfr) |>
rename_with(tolower) |>
mutate(order = str_to_title(order),
family = str_to_title(family),
genus = str_to_title(genus),
animal = paste(genus, species))
Keep only variables we will be using. And factor()
“chr” variables.
mammal_fr <- mammal_fr |>
dplyr::select(order,
family,
genus,
species,
mass_kg,
log10_mass,
log10_br,
habitat,
animal
) |>
mutate(across(where(is.character), factor)) |>
arrange(order, family, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10_br ~ log10_mass | habitat,
color = ~order,
text = ~animal,
# size = ~parse_number(n_animals),
data = mammal_fr,
alpha = 0.5) |>
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) |>
gf_theme(scale_color_viridis_d('Order')) |>
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(fR (breaths/min))')
my_scatter_plot
Figure 2.1:
plotly::ggplotly(my_scatter_plot) |>
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Figure 2.2:
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will include nested random effects of order/family/genus/species, expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10_br ~ log10_mass * habitat + (1 | order/family/genus),
data = mammal_fr)
summary(re_model)
## Family: gaussian ( identity )
## Formula: log10_br ~ log10_mass * habitat + (1 | order/family/genus)
## Data: mammal_fr
##
## AIC BIC logLik deviance df.resid
## 35.6 54.4 -9.8 19.6 69
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## genus:family:order (Intercept) 0.03256 0.1804
## family:order (Intercept) 0.01296 0.1138
## order (Intercept) 0.05950 0.2439
## Residual 0.02935 0.1713
## Number of obs: 77, groups: genus:family:order, 63; family:order, 27; order, 7
##
## Dispersion estimate for gaussian family (sigma^2): 0.0293
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.41176 0.23698 5.957 2.56e-09 ***
## log10_mass -0.34198 0.08635 -3.960 7.48e-05 ***
## habitatland 0.10106 0.26289 0.384 0.701
## log10_mass:habitatland 0.17310 0.10871 1.592 0.111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
log 10 br | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 1.41 | 0.95 – 1.88 | <0.001 |
log10 mass | -0.34 | -0.51 – -0.17 | <0.001 |
habitat [land] | 0.10 | -0.41 – 0.62 | 0.701 |
log10 mass × habitat [land] |
0.17 | -0.04 – 0.39 | 0.111 |
Random Effects | |||
σ2 | 0.03 | ||
τ00 genus:family:order | 0.03 | ||
τ00 family:order | 0.01 | ||
τ00 order | 0.06 | ||
ICC | 0.78 | ||
N genus | 63 | ||
N family | 27 | ||
N order | 7 | ||
Observations | 77 | ||
Marginal R2 / Conditional R2 | 0.413 / 0.872 |
re_anova_results <- car::Anova(re_model)
names(re_anova_results)[3] <- 'p'
pander(re_anova_results)
Chisq | Df | p | |
---|---|---|---|
log10_mass | 16.24 | 1 | 5.582e-05 |
habitat | 20.76 | 1 | 5.211e-06 |
log10_mass:habitat | 2.536 | 1 | 0.1113 |
According to this model, the data provide strong evidence of mass and habitat being associated with different breathing frequency (p = 5.6^{-5} and p = 5.2^{-6}), but not of an interaction between habitat and mass (p = 0.11).
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_fr <- mammal_fr |>
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_fr, 'fitted-models/fr-data-filter.RDS')
saveRDS(re_model, 'fitted-models/fr-re-model-filter.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_fr)
Figure 2.3:
acf(resid(re_model))
Figure 2.4:
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_fr, bins = 11) |>
gf_fitdistr()
Figure 2.5:
mammal_fr <- mammal_fr |>
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10_br ~ 10^log10_mass,
color = ~Habitat,
text = ~animal,
data = mammal_fr) |>
gf_line(10^re_ave_fitted ~ 10^log10_mass,
color = ~Habitat,
data = mammal_fr,
text = ~Habitat) |>
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10_mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) |>
gf_labs(x = 'Body Mass (kg)', y = 'Breathing frequency\n(breaths/minute)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
gf_theme(scale_fill_manual(values = my_colors)) |>
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)))
# For breathing frequency, terrestrial (Stahl 1967): fR (breaths/minute) = 53.5 * body mass ^-0.26
# For breathing frequency, marine (Mortola, 2006): fR (breaths/minute) = 33 * body mass ^-0.42
pub_models <- data.frame(mass = seq(from = 9, by = 10, to = 10000)) |> # mass = seq(from = 0.015, by = 0.1, to = 42000)) |>
mutate(fr_stahl = 53.5*mass^-0.26,
fr_mortola = 33*mass^-0.42)
re_preds <- re_preds |>
gf_line(fr_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl',
alpha = 0.4) |>
gf_line(fr_mortola ~ mass, data = pub_models, color = 'black', linetype = 'dashed',
text = ~'Mortola',
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.9),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds |> gf_theme(legend.position='none') |> ggplotly(tooltip = 'text')
Figure 2.6: Observed and predicted breathing frequency as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) for terrestrial species, and dashed black line on Mortola (2006) for marine species.
gf_point(log10_br ~ re_ind_fitted, data = mammal_fr,
alpha = 0.1) |>
gf_labs(y = 'Observed log10(fr)',
x = 'Model-predicted, Genus-specific log10(fr)',
title = 'Mixed-effects Model (RE of Order/Family/Genus)') |>
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Figure 2.7:
Note that out model tends to overestimate the breathing frequency for animals with lower breath rates, somewhat systematically, after accounting for effects of order/family/genus and habitat.
Predictors: body mass interacting with habitat.
mammal_hr <- read_csv("data/fhdata-filter.csv",
show_col_types = FALSE) |>
janitor::remove_empty(which = 'rows')
Rename some variables and create species name from genus and species.
mammal_hr <- mammal_hr |>
rename(mass_kg = mass,
log10_mass = logmb,
heart_rate_bpm = fh,
log10_hr = logfh) |>
rename_with(tolower) |>
mutate(order = str_to_title(order),
family = str_to_title(family),
genus = str_to_title(genus),
animal = paste(genus, species))
Keep only variables we will be using. And “factor” “chr” variables.
mammal_hr <- mammal_hr |>
dplyr::select(order,
family,
genus,
species,
mass_kg,
log10_mass,
log10_hr,
habitat,
animal
) |>
mutate(across(where(is.character), factor)) |>
arrange(order, family, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10_hr ~ log10_mass | habitat,
color = ~order,
text = ~animal,
# size = ~parse_number(n_animals),
data = mammal_hr,
alpha = 0.5) |>
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) |>
gf_theme(scale_color_viridis_d('Order')) |>
gf_labs(x = 'Log10(fH (beats/min))',
y = 'Log10(BMR (kcal/day))')
my_scatter_plot
Figure 3.1:
plotly::ggplotly(my_scatter_plot) |>
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Figure 3.2:
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will include nested random effects of order/family, not having enough observations per genus and species to fit those random effects. We are thus expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10_hr ~ log10_mass * habitat + (1 | order/family),
data = mammal_hr)
summary(re_model)
## Family: gaussian ( identity )
## Formula: log10_hr ~ log10_mass * habitat + (1 | order/family)
## Data: mammal_hr
##
## AIC BIC logLik deviance df.resid
## -49.8 -37.7 31.9 -63.8 35
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## family:order (Intercept) 6.973e-03 8.350e-02
## order (Intercept) 9.107e-12 3.018e-06
## Residual 8.451e-03 9.193e-02
## Number of obs: 42, groups: family:order, 19; order, 7
##
## Dispersion estimate for gaussian family (sigma^2): 0.00845
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.18942 0.10032 21.824 < 2e-16 ***
## log10_mass -0.16026 0.04040 -3.967 7.27e-05 ***
## habitatland 0.01379 0.13899 0.099 0.921
## log10_mass:habitatland -0.02099 0.05587 -0.376 0.707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
log 10 hr | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 2.19 | 1.99 – 2.39 | <0.001 |
log10 mass | -0.16 | -0.24 – -0.08 | <0.001 |
habitat [land] | 0.01 | -0.26 – 0.29 | 0.921 |
log10 mass × habitat [land] |
-0.02 | -0.13 – 0.09 | 0.707 |
Random Effects | |||
σ2 | 0.01 | ||
τ00 family:order | 0.01 | ||
τ00 order | 0.00 | ||
N family | 19 | ||
N order | 7 | ||
Observations | 42 | ||
Marginal R2 / Conditional R2 | 0.627 / NA |
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
log10_mass | 39.76 | 1 | 2.867e-10 |
habitat | 0.5114 | 1 | 0.4745 |
log10_mass:habitat | 0.1412 | 1 | 0.7071 |
According to this model, the data don’t provide any evidence of an association between habitat and heart rate, after accounting for effect of mass.
Graphical checks below don’t provide evidence of any problems with the residual normality, residual independence, constant residual variance, and linearity conditions that have to be met for this model to be appropriate for the data.
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_hr <- mammal_hr |>
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_hr, 'fitted-models/hr-data-filter.RDS')
saveRDS(re_model, 'fitted-models/hr-re-model-filter.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_hr)
Figure 3.3:
acf(resid(re_model))
Figure 3.4:
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_hr, bins = 9) |>
gf_fitdistr()
Figure 3.5:
mammal_hr <- mammal_hr |>
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10_hr ~ 10^log10_mass,
color = ~Habitat,
text = ~animal,
data = mammal_hr) |>
gf_line(10^re_ave_fitted ~ 10^log10_mass,
color = ~Habitat,
data = mammal_hr,
text = ~Habitat) |>
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10_mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) |>
gf_labs(x = 'Body Mass (kg)', y = 'Heart Rate (beats/minute)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
gf_theme(scale_fill_manual(values = my_colors)) |>
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)
),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
)
# For heart rate (Stahl 1967): fH (beats/minute)= 241 * body mass (kg) ^-0.25
pub_models <- data.frame(mass = seq(from = 9, by = 10, to = 10000)) |> # data.frame(mass = seq(from = 0.009, by = 0.1, to = 70000)) |>
mutate(hr_stahl = 241*mass^-0.25)
re_preds <- re_preds |>
gf_line(hr_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl',
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.9),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds |> gf_theme(legend.position='none') |> ggplotly(tooltip = 'text')
Figure 3.6: Observed and predicted heart rate as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).
gf_point(log10_hr ~ re_ind_fitted, data = mammal_hr,
alpha = 0.1) |>
gf_labs(y = 'Observed log10(fH)',
x = 'Model-predicted, Family-specific log10(fH)',
title = 'Mixed-effects Model (RE of Order/Family)') |>
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Figure 3.7:
Again, we perhaps tend slightly toward overestimation for the lower heart rates and the opposite for the highest ones.
Predictors: body mass interacting with habitat.
mammal_sv <- read_csv("data/svdata-filter.csv",
show_col_types = FALSE) |>
janitor::remove_empty(which = 'rows')
Rename some variables and create species name from genus and species.
mammal_sv <- mammal_sv |>
rename(mass_kg = mass,
log10_sv = logsv,
log10_mass = logmb) |>
rename_with(tolower) |>
mutate(order = str_to_title(order),
family = str_to_title(family),
genus = str_to_title(genus),
animal = paste(genus, species))
Keep only variables we will be using. And factor()
“chr” variables.
mammal_sv <- mammal_sv |>
dplyr::select(order,
family,
genus,
species,
mass_kg,
log10_mass,
log10_sv,
habitat,
animal
) |>
mutate(across(where(is.character), factor)) |>
arrange(order, family, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10_sv ~ log10_mass | habitat,
color = ~order,
text = ~animal,
# size = ~parse_number(n_animals),
data = mammal_sv,
alpha = 0.5) |>
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) |>
gf_theme(scale_color_viridis_d('Order')) |>
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(SV (ml))')
my_scatter_plot
Figure 4.1:
plotly::ggplotly(my_scatter_plot) |>
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Figure 4.2:
Notes: If we wanted to use for web or publication, we would refine. We can edit what info is shown when you hover over a data point, change color scheme, legend, etc.
Will include nested random effects of order/family (not having enough data to resolve genus/species differences), expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are thought to be “further” apart than any other two.
re_model <- glmmTMB(log10_sv ~ log10_mass * habitat +
(1 | order),
data = mammal_sv)
summary(re_model)
## Family: gaussian ( identity )
## Formula: log10_sv ~ log10_mass * habitat + (1 | order)
## Data: mammal_sv
##
## AIC BIC logLik deviance df.resid
## -9.4 -5.2 10.7 -21.4 9
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## order (Intercept) 0.022698 0.15066
## Residual 0.007074 0.08411
## Number of obs: 15, groups: order, 5
##
## Dispersion estimate for gaussian family (sigma^2): 0.00707
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.00239 0.18738 0.013 0.990
## log10_mass 1.07350 0.06495 16.529 <2e-16 ***
## habitatland 0.20669 0.19620 1.053 0.292
## log10_mass:habitatland -0.07497 0.07959 -0.942 0.346
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
log 10 sv | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 0.00 | -0.36 – 0.37 | 0.990 |
log10 mass | 1.07 | 0.95 – 1.20 | <0.001 |
habitat [land] | 0.21 | -0.18 – 0.59 | 0.292 |
log10 mass × habitat [land] |
-0.07 | -0.23 – 0.08 | 0.346 |
Random Effects | |||
σ2 | 0.01 | ||
τ00 order | 0.02 | ||
ICC | 0.76 | ||
N order | 5 | ||
Observations | 15 | ||
Marginal R2 / Conditional R2 | 0.953 / 0.989 |
re_anova_results <- car::Anova(re_model)
pander(re_anova_results)
Chisq | Df | Pr(>Chisq) | |
---|---|---|---|
log10_mass | 491.4 | 1 | 6.916e-109 |
habitat | 0.2656 | 1 | 0.6063 |
log10_mass:habitat | 0.8871 | 1 | 0.3463 |
According to this model, the data don’t provide any evidence of an association between habitat and stroke volume, after accounting for effect of mass.
Graphical checks below don’t provide evidence of any problems with the residual normality, residual independence, constant residual variance, and linearity conditions that have to be met for this model to be appropriate for the data.
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_sv <- mammal_sv |>
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
gf_point(re_resids ~ re_ind_fitted, data = mammal_sv)
Figure 4.3:
# save fitted model and data
saveRDS(mammal_sv, 'fitted-models/sv-data-filter.RDS')
saveRDS(re_model, 'fitted-models/sv-re-model-filter.RDS')
acf(resid(re_model))
Figure 4.4:
gf_dhistogram(~re_resids, data = mammal_sv,
bins = 7) |>
gf_fitdistr()
Figure 4.5:
mammal_sv <- mammal_sv |>
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10_sv ~ 10^log10_mass,
color = ~Habitat,
text = ~animal,
data = mammal_sv) |>
gf_line(10^re_ave_fitted ~ 10^log10_mass,
color = ~Habitat,
data = mammal_sv,
text = ~Habitat) |>
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10_mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) |>
gf_labs(x = 'Body Mass (kg)', y = 'Stroke Volume (mL/beat)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
gf_theme(scale_fill_manual(values = my_colors)) |>
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))
)
# For stroke volume (Stahl 1967, using prediction equations from Stahl for cardiac output (ml/min) and heat rate(beats/min)): SV (ml/beat) = [187 * body mass(kg)^0.81]/[241 * body mass (kg) ^-0.25]
pub_models <- data.frame(mass = seq(from = 9, by = 10, to = 10000)) |> # data.frame(mass = seq(from = 0.029, by = 0.1, to = 5000)) |>
mutate(sv_stahl = (187*mass^0.81) / (241 * mass^-0.25))
re_preds <- re_preds |>
gf_line(sv_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl',
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds |> gf_theme(legend.position='none') |> ggplotly(tooltip = 'text')
Figure 4.6: Observed and predicted stroke volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967).
gf_point(log10_sv ~ re_ind_fitted, data = mammal_sv,
alpha = 0.1) |>
gf_labs(y = 'Observed log10(sv)',
x = 'Model-predicted, Order-specific log10(sv)',
title = 'Mixed-effects Model (RE of Order)') |>
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Figure 4.7:
Predictors: body mass interacting with habitat.
mammal_vt <- read_csv("data/vtdata-filter.csv",
show_col_types = FALSE) |>
janitor::remove_empty(which = 'rows')
Rename some variables and create species name from genus and species.
mammal_vt <- mammal_vt |>
rename(mass_kg = mass,
log10_mass = logmb,
tidal_volume_ml = vt,
log10_tv = logvt) |>
rename_with(tolower) |>
mutate(order = str_to_title(order),
family = str_to_title(family),
genus = str_to_title(genus),
animal = paste(genus, species))
Keep only variables we will be using. And factor()
“chr” variables.
mammal_vt <- mammal_vt |>
dplyr::select(order,
family,
genus,
species,
mass_kg,
log10_mass,
log10_tv,
habitat,
animal
) |>
mutate(across(where(is.character), factor)) |>
arrange(order, family, genus, species)
A few quick graphs just to make sure the data are looking as we expect (error checking).
my_scatter_plot <- gf_point(log10_tv ~ log10_mass | habitat,
color = ~order,
text = ~animal,
# size = ~parse_number(n_animals),
data = mammal_vt,
alpha = 0.5) |>
gf_theme(legend.position = 'bottom',
legend.title = element_text(size = 8),
legend.text = element_text(size = 6)) |>
gf_theme(scale_color_viridis_d('Order')) |>
gf_labs(x = 'Log10(Mass (kg))',
y = 'Log10(Vt (ml))')
my_scatter_plot
Figure 5.1:
plotly::ggplotly(my_scatter_plot) |>
plotly::layout(legend = list(#orientation = 'h',
font = list(size = 6)))
Figure 5.2:
Will include nested random effects of order/family/genus (not having enough data to estimate genus/species effects), expecting similarity of observations based on a hierarchy of phylogenetic relatedness, but not in a specified/structured way; only groupings are used, with no sense of (for example) the fact that two orders are required to be “further” apart than any other two.
re_model <- glmmTMB(log10_tv ~ log10_mass * habitat + (1 | order/family),
data = mammal_vt)
summary(re_model)
## Family: gaussian ( identity )
## Formula: log10_tv ~ log10_mass * habitat + (1 | order/family)
## Data: mammal_vt
##
## AIC BIC logLik deviance df.resid
## -19.2 -10.4 16.6 -33.2 19
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## family:order (Intercept) 5.068e-03 7.119e-02
## order (Intercept) 3.792e-12 1.947e-06
## Residual 1.212e-02 1.101e-01
## Number of obs: 26, groups: family:order, 14; order, 6
##
## Dispersion estimate for gaussian family (sigma^2): 0.0121
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.14315 0.18742 6.099 1.07e-09 ***
## log10_mass 1.10415 0.07076 15.605 < 2e-16 ***
## habitatland 0.07459 0.24199 0.308 0.7579
## log10_mass:habitatland -0.21801 0.10180 -2.142 0.0322 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(re_model)
log 10 tv | |||
---|---|---|---|
Predictors | Estimates | CI | p |
(Intercept) | 1.14 | 0.78 – 1.51 | <0.001 |
log10 mass | 1.10 | 0.97 – 1.24 | <0.001 |
habitat [land] | 0.07 | -0.40 – 0.55 | 0.758 |
log10 mass × habitat [land] |
-0.22 | -0.42 – -0.02 | 0.032 |
Random Effects | |||
σ2 | 0.01 | ||
τ00 family:order | 0.01 | ||
τ00 order | 0.00 | ||
N family | 14 | ||
N order | 6 | ||
Observations | 26 | ||
Marginal R2 / Conditional R2 | 0.977 / NA |
re_anova_results <- car::Anova(re_model)
names(re_anova_results)[3] <- 'p'
pander(re_anova_results)
Chisq | Df | p | |
---|---|---|---|
log10_mass | 396 | 1 | 4.024e-88 |
habitat | 40.26 | 1 | 2.22e-10 |
log10_mass:habitat | 4.586 | 1 | 0.03223 |
According to this model, the data provide very strong evidence that mass and habitat are associated with tidal volume (p = 4^{-88} and p = 2.2^{-10}), and moderate evidence of an interaction between body mass and habitat (p = 0.032).
Graphical checks below don’t provide evidence of any problems with the residual normality, residual independence, constant residual variance, and linearity conditions that have to be met for this model to be appropriate for the data.
re_ave_preds <- predict(re_model,
se.fit = TRUE,
re.form = ~0)
re_ind_preds <- predict(re_model,
se.fit = TRUE,
re.form = NULL)
mammal_vt <- mammal_vt |>
mutate(re_resids = resid(re_model),
re_ind_fitted = re_ind_preds$fit,
re_ave_fitted = re_ave_preds$fit,
re_ave_lo = re_ave_preds$fit - 1.96*re_ave_preds$se.fit,
re_ave_hi = re_ave_preds$fit + 1.96*re_ave_preds$se.fit)
# save fitted model and data
saveRDS(mammal_vt, 'fitted-models/vt-data-filter.RDS')
saveRDS(re_model, 'fitted-models/vt-re-model-filter.RDS')
gf_point(re_resids ~ re_ind_fitted, data = mammal_vt)
Figure 5.3:
acf(resid(re_model))
Figure 5.4:
#gf_acf(~re_model)
gf_dhistogram(~re_resids, data = mammal_vt, bins = 7) |>
gf_fitdistr()
Figure 5.5:
mammal_vt <- mammal_vt |>
mutate(Habitat = stringr::str_to_sentence(habitat))
re_preds <- gf_point(10^log10_tv ~ 10^log10_mass,
color = ~Habitat,
text = ~animal,
data = mammal_vt) |>
gf_line(10^re_ave_fitted ~ 10^log10_mass,
color = ~Habitat,
data = mammal_vt,
text = ~Habitat) |>
gf_ribbon(10^re_ave_lo + 10^re_ave_hi ~ 10^log10_mass,
color = ~Habitat, fill = ~Habitat,
text = ~Habitat) |>
gf_labs(x = 'Body Mass (kg)', y = 'Tidal Volume (mL)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
gf_theme(scale_fill_manual(values = my_colors)) |>
gf_refine(scale_x_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_continuous(trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)))
# For tidal volume, terrestrial (Stahl 1967): SV (ml) = 7.69 * body mass^1.04
# For tidal volume, marine (Fahlman 2020, sea lion/cetacean papers): SV (l) = 0.0372 * body mass^0.97
pub_models <- data.frame(mass = seq(from = 9, by = 10, to = 10000)) |> # data.frame(mass = seq(from = 0.008, by = 0.1, to = 5200)) |>
mutate(vt_stahl = 7.69 * mass^1.04,
vt_fahlman = 37.2 * mass^0.97)
re_preds <- re_preds |>
gf_line(vt_stahl ~ mass, data = pub_models, color = 'black',
text = ~'Stahl',
alpha = 0.4) |>
gf_line(vt_fahlman ~ mass, data = pub_models, color = 'black',
linetype = 'dashed', text = ~'Fahlman',
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
re_preds |> gf_theme(legend.position='none') |> ggplotly(tooltip = 'text')
Figure 5.6: Observed and predicted tidal volume as a function of mass and habitat. Lines are model predictions, points are observed data, and shaded areas are 95% confidence intervals for expected (average) values. Colored lines are predictions from the mixed-effects model, green for terrestrial and blue for aquatic; black solid line is based on Stahl (1967) and black dashed line on Fahlman (2020).
gf_point(log10_tv ~ re_ind_fitted, data = mammal_vt,
alpha = 0.1) |>
gf_labs(y = 'Observed log10(Vt)',
x = 'Model-predicted, Order-and-Family-specific log10(VT)',
title = 'Mixed-effects Model (RE of Order/family)') |>
gf_abline(slope = 1, intercept = 0, color = 'black', linetype = 'dashed')
Figure 5.7:
Cardiac output is the product of heart rate (hr
) and stroke volume (sv
).
We will use a parametric bootstrap to make predictions of cardiac output as a function of log10(body mass) and habitat based on our random-effect models for hr
and sv
.
n_boot <- 1000
We will make sets of many (1000) predictions for each of the species that occur in both the hr
and sv
data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_hr <- readRDS('fitted-models/hr-data-filter.RDS')
hr_mod <- readRDS('fitted-models/hr-re-model-filter.RDS')
mammal_sv <- readRDS('fitted-models/sv-data-filter.RDS')
sv_mod <- readRDS('fitted-models/sv-re-model-filter.RDS')
cardiac_species <- intersect(pull(mammal_hr, animal), pull(mammal_sv, animal))
The two datasets have 12 species in common.
Make dataset for which to make predictions, containing these species.
cardiac_hr <- mammal_hr |>
dplyr::filter(animal %in% cardiac_species) |>
dplyr::select(animal, order, family, genus, species, log10_mass, log10_hr, habitat) |>
mutate(across(where(is.factor), factor))
cardiac_sv <- mammal_sv |>
dplyr::filter(animal %in% cardiac_species) |>
dplyr::select(animal, order, family, genus, species, log10_mass, log10_sv, habitat)
For each row of the dataset, make a set of 1000 model predictions of cardiac output. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (hr
or sv
) was originally fitted.
First we define custom functions to make the predictions
predict_hr <- function(.){
predict(.,
newdata = cardiac_hr,
re.form = NULL,
type = 'response')
}
predict_sv <- function(.){
predict(.,
newdata = cardiac_sv,
re.form = NULL,
type = 'response')
}
Do the bootstrap(s)
if (!file.exists('fitted-models/hr_boot_22-filter.RDS')){
hr_boot <- bootMer(hr_mod,
FUN = predict_hr,
nsim = n_boot,
re.form = NULL,
type = 'parametric'
)
saveRDS(hr_boot, 'fitted-models/hr_boot_22-filter.RDS')
}else{
hr_boot <- readRDS('fitted-models/hr_boot_22-filter.RDS')
}
if (!file.exists('fitted-models/sv_boot_22-filter.RDS')){
sv_boot <- bootMer(sv_mod,
FUN = predict_sv,
nsim = n_boot,
re.form = NULL,
type = 'parametric')
saveRDS(sv_boot, 'fitted-models/sv_boot_22-filter.RDS')
}else{
sv_boot <- readRDS('fitted-models/sv_boot_22-filter.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted cardiac output (and its median and 95% bootstrap CI):
cardiac_hr <- cardiac_hr |>
mutate(boot_pred_hr = unlist(apply(t(hr_boot$t), 1, list), recursive = FALSE))
cardiac_sv <- cardiac_sv |>
mutate(boot_pred_sv = unlist(apply(t(sv_boot$t), 1, list), recursive = FALSE))
cardiac_preds <- inner_join(cardiac_hr, cardiac_sv,
by = c('animal', 'order', 'genus', 'species', 'habitat')) |>
mutate(boot_pred_cardiac = map2(boot_pred_hr, boot_pred_sv, function(.x, .y) mapply(prod, 10^.x, 10^.y))) |>
mutate(boot_median_cardiac = map_dbl(boot_pred_cardiac, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_cardiac, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_cardiac, quantile, probs = 0.975, na.rm = TRUE),
mean_mass = ((10^log10_mass.x + 10^log10_mass.y) / 2) )
# units: ml/stroke * beats/min
plot results
cardiac_mass <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
color = ~habitat,
text = ~animal) |>
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
color = ~habitat) |>
gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(cardiac_mass, tooltip = 'text')
Figure 6.1:
cardiac_mass
cardiac_preds <- cardiac_preds |>
mutate(Habitat = str_to_title(habitat) )
cardiac_mass_log <- gf_point(boot_median_cardiac ~ mean_mass, data = cardiac_preds,
color = ~Habitat,
text = ~animal) |>
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = cardiac_preds,
color = ~Habitat) |>
gf_lims(x = c(0.001, 10000)) |>
# add previous result from Stahl 1967
# Cardiac output (ml/min) = 187*Mb^0.81
gf_line(187*mean_mass^0.81 ~ mean_mass,
data = cardiac_preds,
inherit = FALSE,
alpha = 0.4) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_refine(scale_y_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_labs(# title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
ggplotly(cardiac_mass_log, tooltip = 'text')
Figure 6.2: Predicted cardiac output as a function of mass and habitat. Colored dots with error bars are mean values and percentile-based 95 percent confidence intervals from parametric bootstrap. Black solid line is based on Stahl (1967).
cardiac_mass_log |>
gf_labs(tag = 'F')
Total ventilation is the product of breathing rate (fr
) and tidal volume (vt
).
We will use a parametric bootstrap to make predictions of total ventilation as a function of log10(body mass) and habitat based on our random-effect models for fr
and vt
.
We will make sets of many (1000) predictions for each of the species that occur in both the fr
and vt
data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_fr <- readRDS('fitted-models/fr-data-filter.RDS')
fr_mod <- readRDS('fitted-models/fr-re-model-filter.RDS')
mammal_vt <- readRDS('fitted-models/vt-data-filter.RDS')
vt_mod <- readRDS('fitted-models/vt-re-model-filter.RDS')
vent_species <- intersect(pull(mammal_fr, animal), pull(mammal_vt, animal))
The two datasets have 22 species in common.
Make datasets for which to make predictions, containing these species.
vent_fr <- mammal_fr |>
dplyr::filter(animal %in% vent_species) |>
dplyr::select(animal, order, family, genus, species, log10_mass, log10_br, habitat) |>
mutate(across(where(is.factor), factor))
vent_vt <- mammal_vt |>
dplyr::filter(animal %in% vent_species) |>
dplyr::select(animal, order, family, genus, species, log10_mass, log10_tv, habitat)
For each row of the dataset, make a set of 1000 model predictions of ventilation. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (fr
or vt
) was originally fitted.
First we define custom functions to make the predictions
predict_fr <- function(.){
predict(.,
newdata = vent_fr,
re.form = NULL,
type = 'response')
}
predict_vt <- function(.){
predict(.,
newdata = vent_vt,
re.form = NULL,
type = 'response')
}
Do the bootstrap(s)
if (!file.exists('fitted-models/fr_boot_22-filter.RDS')){
fr_boot <- bootMer(fr_mod,
FUN = predict_fr,
nsim = n_boot,
re.form = NULL,
type = 'parametric'
)
saveRDS(fr_boot, 'fitted-models/fr_boot_22-filter.RDS')
}else{
fr_boot <- readRDS('fitted-models/fr_boot_22-filter.RDS')
}
if (!file.exists('fitted-models/vt_boot_22-filter.RDS')){
vt_boot <- bootMer(vt_mod,
FUN = predict_vt,
nsim = n_boot,
re.form = NULL,
type = 'parametric')
saveRDS(vt_boot, 'fitted-models/vt_boot_22-filter.RDS')
}else{
vt_boot <- readRDS('fitted-models/vt_boot_22-filter.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted ventilation (and its median and 95% bootstrap CI):
vent_fr <- vent_fr |>
mutate(boot_pred_fr = unlist(apply(t(fr_boot$t), 1, list), recursive = FALSE))
vent_vt <- vent_vt |>
mutate(boot_pred_vt = unlist(apply(t(vt_boot$t), 1, list), recursive = FALSE))
vent_preds <- inner_join(vent_fr, vent_vt,
by = c('animal', 'order', 'genus', 'species', 'habitat')) |>
mutate(boot_pred_vent = map2(boot_pred_fr, boot_pred_vt, function(.x, .y) mapply(prod, 10^.x, 10^.y))) |>
mutate(boot_median_vent = map_dbl(boot_pred_vent, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_vent, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_vent, quantile, probs = 0.975, na.rm = TRUE),
mean_mass = ((10^log10_mass.x + 10^log10_mass.y) / 2) )
# units: breaths/minute (?) * mL / breath ---> mL/min
plot results
vent_mass <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
color = ~habitat,
text = ~animal) |>
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
color = ~habitat) |>
gf_labs(title = 'Expected Ventilation\nfor the species modeled',
y = 'Predicted Ventilation (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(vent_mass, tooltip = 'text')
Figure 6.3:
vent_mass
vent_preds <- vent_preds |>
mutate(Habitat = str_to_title(habitat))
vent_mass_log <- gf_point(boot_median_vent ~ mean_mass, data = vent_preds,
color = ~Habitat,
text = ~animal) |>
gf_errorbar(boot_lo + boot_hi ~ mean_mass, data = vent_preds,
color = ~Habitat)|>
gf_lims(x = c(0.001, 10000)) |>
gf_refine(scale_x_log10(#trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_log10(labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_labs(# title = 'Expected Ventilation\nfor the species modeled',
y = 'Predicted Minute Ventilation (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors)) |>
# add previous result from Stahl 1967
# minute volume: MV (ml/min)=379*Mb^0.80 (Mb=body mass in kg)
gf_line(379*mean_mass^0.80 ~ mean_mass, data = cardiac_preds,
colour = 'black', inherit = FALSE,
alpha = 0.4) |>
# c(0,0) corresponds to the “bottom left” and c(1,1) corresponds to the “top right” position
gf_theme(legend.position = c(0.9, 0.1),
legend.title = element_blank()) |>
gf_theme(plot.margin = unit(c(1,2,1,1), "cm"))
ggplotly(vent_mass_log, tooltip = 'text')
Figure 6.4: Predicted ventilation as a function of mass and habitat. Colored dots with error bars are mean values and percentile-based 95 percent confidence intervals from parametric bootstrap. Black solid line is based on Stahl (1967).
vent_mass_log |>
gf_labs(tag = 'G')
Cardiac output is the product of heart rate (hr
) and stroke volume (sv
).
We will use a parametric bootstrap to make predictions of cardiac output as a function of log10(body mass) and habitat based on our random-effect models for hr
and sv
. This time, we focus on the difference in predicted cardiac output for (hypothetical) pairs of species of a range of masses that differ only in their habitat.
rm(list = ls())
my_colors <- RColorBrewer::brewer.pal(3, "Paired")[2:3]
n_boot <- 1000
We will make sets of many (1000) predictions for each of the species that occur in both the hr
and sv
data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_hr <- readRDS('fitted-models/hr-data-filter.RDS')
hr_mod <- readRDS('fitted-models/hr-re-model-filter.RDS')
mammal_sv <- readRDS('fitted-models/sv-data-filter.RDS')
sv_mod <- readRDS('fitted-models/sv-re-model-filter.RDS')
Make dataset for which to make predictions, pairs of hypothetical species that range in size from about 10 to 5000 kg.
cardiac_hr <- cardiac_sv <- data.frame(numb = c(1:100),
mass_kg = c(seq(from = 10, to = 5000, by = 100),
seq(from = 10, to = 5000, by = 100)))
cardiac_hr <- cardiac_hr |>
mutate(log10_mass = log10(mass_kg),
habitat = c(rep('aquatic', 50),
rep('land', 50)),
animal = sample(mammal_hr$animal, size = 100, replace = TRUE),
order = sample(mammal_hr$order, size = 100, replace = TRUE),
family = sample(mammal_hr$family, size = 100, replace = TRUE),
genus = sample(mammal_hr$genus, size = 100, replace = TRUE),
species = sample(mammal_hr$species, size = 100, replace = TRUE),
log10_fr = 0) |>
dplyr::select(numb, animal, order, family, genus, species, log10_mass, log10_fr, habitat)
cardiac_sv <- cardiac_sv |>
mutate(log10_mass = log10(mass_kg),
habitat = c(rep('aquatic', 50),
rep('land', 50)),
order = sample(mammal_sv$order, size = 100, replace = TRUE),
family = sample(mammal_sv$family, size = 100, replace = TRUE),
genus = sample(mammal_sv$genus, size = 100, replace = TRUE),
species = sample(mammal_sv$species, size = 100, replace = TRUE),
log10_sv = 0) |>
dplyr::select(numb, log10_sv, log10_mass, habitat, order, family, genus, species)
For each row of the dataset, make a set of 1000 model predictions of cardiac output. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (hr
or sv
) was originally fitted.
First we define custom functions to make the predictions
predict_hr <- function(.){
predict(.,
newdata = cardiac_hr,
re.form = NA,
type = 'response')
}
predict_sv <- function(.){
predict(.,
newdata = cardiac_sv,
re.form = NA,
type = 'response',
allow.new.levels = TRUE)
}
Do the bootstrap(s)
if (!file.exists('fitted-models/hr_habitat_boot_22-filter.RDS')){
hr_boot <- bootMer(hr_mod,
FUN = predict_hr,
nsim = n_boot,
re.form = NA,
type = 'parametric'
)
saveRDS(hr_boot, 'fitted-models/hr_habitat_boot_22-filter.RDS')
}else{
hr_boot <- readRDS('fitted-models/hr_habitat_boot_22-filter.RDS')
}
if (!file.exists('fitted-models/sv_habitat_boot_22-filter.RDS')){
sv_boot <- bootMer(sv_mod,
FUN = predict_sv,
nsim = n_boot,
re.form = NA,
type = 'parametric')
saveRDS(sv_boot, 'fitted-models/sv_habitat_boot_22-filter.RDS')
}else{
sv_boot <- readRDS('fitted-models/sv_habitat_boot_22-filter.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted cardiac output (and its median and 95% bootstrap CI):
cardiac_hr <- cardiac_hr |>
mutate(boot_pred_hr = unlist(apply(t(hr_boot$t), 1, list), recursive = FALSE),
across(where(is.factor), as.character))
cardiac_sv <- cardiac_sv |>
mutate(boot_pred_sv = unlist(apply(t(sv_boot$t), 1, list), recursive = FALSE),
across(where(is.factor), as.character))
cardiac_preds <- inner_join(cardiac_hr, cardiac_sv,
by = c("numb", "habitat")) |>
mutate(boot_pred_cardiac = map2(boot_pred_hr, boot_pred_sv, function(.x, .y) mapply(prod, 10^.x, 10^.y))) |>
mutate(boot_median_cardiac = map_dbl(boot_pred_cardiac, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_cardiac, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_cardiac, quantile, probs = 0.975, na.rm = TRUE))
# units: ml/stroke * beats/min
plot results
cardiac_mass <- gf_point(boot_median_cardiac ~ log10_mass.x, data = cardiac_preds,
color = ~habitat) |>
gf_errorbar(boot_lo + boot_hi ~ log10_mass.x, data = cardiac_preds,
color = ~habitat) |>
gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(cardiac_mass, tooltip = '')
Figure 7.1:
cardiac_mass
cardiac_mass_log <- gf_point(boot_median_cardiac ~ 10^log10_mass.x, data = cardiac_preds,
color = ~habitat,
text = ~animal) |>
gf_errorbar(boot_lo + boot_hi ~ 10^log10_mass.x, data = cardiac_preds,
color = ~habitat) |>
gf_lims(x = c(10, 10000)) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_refine(scale_y_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Cardiac Output\nfor the species modeled',
y = 'Predicted Cardiac Output (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(cardiac_mass_log, tooltip = 'text')
Figure 7.2:
cardiac_mass_log
We’re also interested in judging whether the predicted cardiac output differs between terrestrial and aquatic species. For this, we may better consider the difference between (hypothetical) species that differ only in habitat.
cardiac_diffs <- cardiac_preds |>
select(log10_mass.x, habitat, boot_pred_hr, boot_pred_sv, boot_pred_cardiac) |>
pivot_wider(names_from = 'habitat',
values_from = c('boot_pred_cardiac', 'boot_pred_sv', 'boot_pred_hr')) |>
mutate(boot_diff_cardiac_tma = purrr::map2(boot_pred_cardiac_land,
boot_pred_cardiac_aquatic,
~ .x - .y)) |>
mutate(boot_median_cardiac_diff = map_dbl(boot_diff_cardiac_tma, median, na.rm = TRUE),
boot_cardiac_diff_lo = map_dbl(boot_diff_cardiac_tma, quantile, probs = 0.025, na.rm = TRUE),
boot_cardiac_diff_hi = map_dbl(boot_diff_cardiac_tma, quantile, probs = 0.975, na.rm = TRUE))
require(ggallin)
gf_line(boot_median_cardiac_diff ~ 10^log10_mass.x,
data = cardiac_diffs) |>
gf_ribbon(boot_cardiac_diff_lo + boot_cardiac_diff_hi ~ 10^log10_mass.x) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
# gf_refine(scale_y_continuous(trans=pseudolog10_trans,
# labels = scales::label_comma(accuracy = 0.001,
# drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Cardiac Output Difference\nTerrestrial minus Aquatic',
y = 'Cardiac Output Difference (mL/min)',
x = 'Body Mass (kg)')
Figure 7.3:
gf_line(boot_median_cardiac_diff ~ 10^log10_mass.x,
data = cardiac_diffs) |>
gf_ribbon(boot_cardiac_diff_lo + boot_cardiac_diff_hi ~ 10^log10_mass.x) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_refine(scale_y_continuous(trans=pseudolog10_trans,
breaks = c(-500000, -50000, -5000, -500, -50, 0, 50, 500, 5000, 50000, 500000),
labels = scales::label_comma(accuracy = 0.01,
drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Cardiac Output Difference\nTerrestrial minus Aquatic',
y = 'Cardiac Output Difference (mL/min)',
x = 'Body Mass (kg)')
Figure 7.4:
Total ventilation is the product of breathing rate (fr
) and tidal volume (vt
).
We will use a parametric bootstrap to make predictions of total ventilation as a function of log10(body mass) and habitat based on our random-effect models for fr
and vt
.
We will make sets of many (1000) predictions for each of the species that occur in both the fr
and vt
data sets.
# note NAMES HAVE TO BE SAME as passed to glmmTMB() when models were fitted (geesh)
mammal_fr <- readRDS('fitted-models/fr-data-filter.RDS')
fr_mod <- readRDS('fitted-models/fr-re-model-filter.RDS')
mammal_vt <- readRDS('fitted-models/vt-data-filter.RDS')
vt_mod <- readRDS('fitted-models/vt-re-model-filter.RDS')
Make datasets for which to make predictions.
vent_fr <- vent_vt <- data.frame(numb = c(1:1182),
mass_kg = c(seq(from = 0.1, to = 100, by = 1),
seq(from = 100, to = 5000, by = 10),
seq(from = 0.1, to = 100, by = 1),
seq(from = 100, to = 5000, by = 10)))
vent_fr <- vent_fr |>
mutate(log10_mass = log10(mass_kg),
habitat = c(rep('aquatic', 591),
rep('land', 591)),
animal = sample(mammal_fr$animal, size = 1182, replace = TRUE),
order = sample(mammal_fr$order, size = 1182, replace = TRUE),
family = sample(mammal_fr$family, size = 1182, replace = TRUE),
genus = sample(mammal_fr$genus, size = 1182, replace = TRUE),
species = sample(mammal_fr$species, size = 1182, replace = TRUE),
log10_fr = 0) |>
dplyr::select(numb, order, family, genus, species, log10_mass, log10_fr, habitat)
vent_vt <- vent_vt |>
mutate(log10_mass = log10(mass_kg),
habitat = c(rep('aquatic', 591),
rep('land', 591)),
order = sample(mammal_vt$order, size = 1182, replace = TRUE),
family = sample(mammal_vt$family, size = 1182, replace = TRUE),
genus = sample(mammal_vt$genus, size = 1182, replace = TRUE),
species = sample(mammal_vt$species, size = 1182, replace = TRUE),
log10_sv = 0) |>
dplyr::select(numb, log10_sv, log10_mass, habitat, order, family, genus, species)
For each row of the dataset, make a set of 1000 model predictions of ventilation. These predictions will include uncertainty in the model parameters (intercept and slopes) as well as variation attributed to order/genus/species. (Residual variation is not included). The mass estimates are different in the different datasets; here we use the mass estimate from the dataset to which the model (fr
or vt
) was originally fitted.
First we define custom functions to make the predictions
predict_fr <- function(.){
predict(.,
newdata = vent_fr,
re.form = NA,
type = 'response')
}
predict_vt <- function(.){
predict(.,
newdata = vent_vt,
re.form = NA,
type = 'response')
}
Do the bootstrap(s)
if (!file.exists('fitted-models/fr_boot-diff_22-filter.RDS')){
fr_boot <- bootMer(fr_mod,
FUN = predict_fr,
nsim = n_boot,
re.form = NULL,
type = 'parametric'
)
saveRDS(fr_boot, 'fitted-models/fr_boot-diff_22-filter.RDS')
}else{
fr_boot <- readRDS('fitted-models/fr_boot-diff_22-filter.RDS')
}
if (!file.exists('fitted-models/vt_boot-diff_22-filter.RDS')){
vt_boot <- bootMer(vt_mod,
FUN = predict_vt,
nsim = n_boot,
re.form = NULL,
type = 'parametric')
saveRDS(vt_boot, 'fitted-models/vt_boot-diff_22-filter.RDS')
}else{
vt_boot <- readRDS('fitted-models/vt_boot-diff_22-filter.RDS')
}
Add boot results to datasets, combine to one dataset, and compute predicted ventilation (and its median and 95% bootstrap CI):
vent_fr <- vent_fr |>
mutate(boot_pred_fr = unlist(apply(t(fr_boot$t), 1, list), recursive = FALSE))
vent_vt <- vent_vt |>
mutate(boot_pred_vt = unlist(apply(t(vt_boot$t), 1, list), recursive = FALSE))
vent_preds <- inner_join(vent_fr, vent_vt,
by = c('numb', 'habitat')) |>
mutate(boot_pred_vent = map2(boot_pred_fr, boot_pred_vt, function(.x, .y) mapply(prod, 10^.x, 10^.y))) |>
mutate(boot_median_vent = map_dbl(boot_pred_vent, median, na.rm = TRUE),
boot_lo = map_dbl(boot_pred_vent, quantile, probs = 0.025, na.rm = TRUE),
boot_hi = map_dbl(boot_pred_vent, quantile, probs = 0.975, na.rm = TRUE))
# units: breaths/minute (?) * mL / breath ---> mL/min
plot results
vent_mass <- gf_point(boot_median_vent ~ 10^log10_mass.x, data = vent_preds,
color = ~habitat) |>
gf_errorbar(boot_lo + boot_hi ~ 10^log10_mass.x, data = vent_preds,
color = ~habitat) |>
gf_labs(title = 'Expected Ventilation',
y = 'Predicted Ventilation (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(vent_mass)
Figure 7.5:
vent_mass
vent_mass_log <- gf_point(boot_median_vent ~ 10^log10_mass.x, data = vent_preds,
color = ~habitat) |>
gf_errorbar(boot_lo + boot_hi ~ 10^log10_mass.x, data = vent_preds,
color = ~habitat)|>
gf_lims(x = c(10, 10000)) |>
gf_refine(scale_x_log10(#trans = 'log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE)),
scale_y_log10(labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Ventilation',
y = 'Predicted Minute Ventilation (mL/min)',
x = 'Body Mass (kg)') |>
gf_theme(scale_color_manual(values = my_colors))
ggplotly(vent_mass_log, tooltip = 'text')
Figure 7.6:
vent_mass_log
We’re also interested in judging whether the predicted ventilation differs between terrestrial and aquatic species. For this, we may better consider the difference between (hypothetical) species that differ only in habitat.
vent_diffs <- vent_preds |>
select(log10_mass.x, habitat, boot_pred_fr, boot_pred_vt, boot_pred_vent) |>
pivot_wider(names_from = 'habitat',
values_from = c('boot_pred_vent', 'boot_pred_fr', 'boot_pred_vt')) |>
mutate(boot_diff_vent_tma = purrr::map2(boot_pred_vent_land,
boot_pred_vent_aquatic,
~ .x - .y)) |>
mutate(boot_median_vent_diff = map_dbl(boot_diff_vent_tma, median, na.rm = TRUE),
boot_vent_diff_lo = map_dbl(boot_diff_vent_tma, quantile, probs = 0.025, na.rm = TRUE),
boot_vent_diff_hi = map_dbl(boot_diff_vent_tma, quantile, probs = 0.975, na.rm = TRUE))
require(ggallin)
gf_line(boot_median_vent_diff ~ 10^log10_mass.x,
data = vent_diffs) |>
gf_ribbon(boot_vent_diff_lo + boot_vent_diff_hi ~ 10^log10_mass.x) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
# gf_refine(scale_y_continuous(trans=pseudolog10_trans,
# labels = scales::label_comma(accuracy = 0.001,
# drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Minute Ventilation Difference\nTerrestrial minus Aquatic',
y = 'Ventilation Difference (mL/min)',
x = 'Body Mass (kg)')
Figure 7.7:
gf_line(boot_median_vent_diff ~ 10^log10_mass.x,
data = vent_diffs) |>
gf_ribbon(boot_vent_diff_lo + boot_vent_diff_hi ~ 10^log10_mass.x) |>
gf_refine(scale_x_continuous(trans='log10',
labels = scales::label_comma(accuracy = 0.001,
drop0trailing = TRUE))) |>
gf_refine(scale_y_continuous(trans=pseudolog10_trans,
breaks = c(-500000, -50000, -5000, -500, -50, 0, 50, 500, 5000, 50000, 500000),
labels = scales::label_comma(accuracy = 0.01,
drop0trailing = TRUE))) |>
gf_labs(title = 'Expected Ventilation Difference\nTerrestrial minus Aquatic',
y = 'Ventilation Difference (mL/min)',
x = 'Body Mass (kg)')
Figure 7.8: