-
Notifications
You must be signed in to change notification settings - Fork 0
/
Create_Figure_1_and_Supplementary_Figs.Rmd
821 lines (623 loc) · 36.8 KB
/
Create_Figure_1_and_Supplementary_Figs.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
---
title: "R Notebook"
output: html_notebook
---
This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*.
```{r}
library(readxl)
library(ggplot2)
library(factoextra)
library(FactoMineR)
library('PCAtools')
library(ppcor)
library(boot)
library(ape)
library(ade4)
library("dplyr")
library(caper)
library(nlme)
library(gridExtra)
library('phytools')
library(colorspace)
library(ggpubr)
library(lemon)
library("gplots")
library(devtools)
library(ggtree)
library(cowplot)
library(colorspace)
library("ggplotify")
library('ComplexHeatmap')
```
### Calculate the logarithms of the life history traits and add them as new columns
```{r fig.width=4.5, fig.height=3}
## LOAD dataset
lht=read_excel('Supplementary Data/Supplementary Table 1.xlsx',sheet='1j - Data for analysis')
## Set species name as rowname for phylogenetic analysis later
d=lht
d=as.data.frame(d)
rownames(d)=gsub(' ','_',d$`Species name`)
d$`Species name`=gsub(' ','_',d$`Species name`)
colnames(d)=gsub(' ','_',colnames(d))
colnames(d)=lapply(colnames(d),function(x) paste0(tolower(substr(x,1,1)),substr(x,2,nchar(x))))
## Calculate log transformation of selected columns
cols_to_be_log_transformed=c('lifespan','somatic_mutation_rate','respiratory_rate','resting_heart_rate','adult_mass',
'female_sexual_maturity','male_sexual_maturity',
'mass_specific_BMR')
# Collect the log transformed column names in vector
log_transformed_cols=c()
for (col in cols_to_be_log_transformed){
log_colname=paste(c('log',col),collapse='_')
d[,log_colname]=log10(d[,col])
log_transformed_cols=append(log_colname,log_transformed_cols)
}
d[d$phylogenetic_grandorder_order=='Perissodactyla and Artiodactyla','phylogenetic_grandorder_order']='Perissodactyla &\nArtiodactyla'
```
## Read phylogenetic tree data
## 1. Read tree that has been constructed using mtDNA
## 2. Read multiple trees (1000 trees from vertlife.org) into a multi-phylo object and get a consensus tree
## 2.1 There ara 3 diffetent methods to build a consensus tree
## 3. Root trees in order to use them with caper's pgls function -> midpoint root method
```{r}
## Read the 10000 trees acquired from vertlife.org into multi-phylo object
multi_tree_10000_completed=read.nexus('10000_tip_dated_completed_5911_species.nex', force.multi = TRUE)
multi_tree_10000=multi_tree_10000_completed
## Check if any of the trees do not contain all 15 species and collect them
trees_to_drop=c()
for (name in names(multi_tree_10000)){
t=multi_tree_10000[[name]]
if (length(t$tip.label)<15){
trees_to_drop=append(tree_to_drop,name)
}
}
## Drop trees containing less then 15 species
multi_tree_10000=multi_tree_10000[!is.element(names(multi_tree_10000),trees_to_drop)]
## Create consensus trees. More on the methods http://blog.phytools.org/2016/03/method-to-compute-consensus-edge.html
## Rename Canis lupus (grey wolf) to Canis lupus familiaris (dog) to match species names of the dataset and tree
cons_tree_10000=consensus.edges(multi_tree_10000,method='least.squares')
cons_tree_10000$tip.label[grepl('Canis',cons_tree_10000$tip.label)]='Canis_lupus_familiaris'
```
## 1. Midpoint root the selected consensus tree for analysis with the caper package
## 2. Save previously created rooted consensus tree
```{r}
#trees=list(least_squares=cons_least_squares_10000,mean_edge=cons_mean_edge_10000)
tree=cons_tree_10000
## Root the selected tree for analysis
rooted_tree=midpoint.root(tree)
## Delete internal node names, as they overlap with tip-label names and cause an error with comparative-data function
rooted_tree$node.label=NULL
comp_dat=comparative.data(phy=rooted_tree, data=d, names.col=species_name, vcv = TRUE,warn.dropped = F,na.omit=F)
tree=rooted_tree
saveRDS(tree,file='rooted_tree.Rds')
```
## Load saved rooted tree and create comparative data for caper
```{r}
rooted_tree=readRDS(file='rooted_tree.Rds')
rooted_tree$node.label=NULL
comp_dat=comparative.data(phy=rooted_tree, data=d, names.col=species_name, vcv = TRUE,warn.dropped = F,na.omit=F)
tree=rooted_tree
```
### Create Figure 1
### 1.PLot phylogenetic tree of species with 3 main branches color coded
### 2. PLot heatmap of traits, where only the traits get clustered, the species have the same order as on the phylo. tree
### 3. PCA of traits with the main branches indicated
```{r fig.width=15.5,fig.height=11.5}
library("gplots")
library(ggtree)
library(cowplot)
library(colorspace)
library("ggplotify")
library('ComplexHeatmap')
## Initialize column names to include in Phyl.tree plot,Heatmap ,PCA
colnames_for_anal=c('log_lifespan','log_somatic_mutation_rate','log_respiratory_rate','log_resting_heart_rate','log_adult_mass',
'log_female_sexual_maturity','log_male_sexual_maturity','litter_size','log_mass_specific_BMR')
my_cols=qualitative_hcl(length(unique(d$phylogenetic_grandorder_order)),palette = "Dark 3")
## Delelete the '_' character from the names of the species
tree_for_ggplot=tree
tree_for_ggplot$tip.label=d[match(tree$tip.label,d$species_name),'common_name']
## Set the main branch label for the tree
branch_groups=list()
for (group in (unique(d$phylogenetic_grandorder_order))){
species_in_group=d[d$phylogenetic_grandorder_order==group,'common_name']
species_in_group=gsub('_',' ',species_in_group)
branch_groups[[group]]=unlist(species_in_group)
}
## Sort list by names
orders_grandorders=c('Carnivora','Perissodactyla &\nArtiodactyla','Rodentia','Lagomorpha','Primates')
branch_groups=branch_groups[orders_grandorders]
## Add manually the nodes of the different orders/grandorders for clade labelling with geom_cladelab later (mrca=most recent common ancestor)
mrca_node_numbers=c(20,18,26,28)
clade_df=data.frame(node=mrca_node_numbers,name=orders_grandorders[orders_grandorders!='Lagomorpha'],fill=my_cols[-4])
# Create new tree where groups are added to the tree itself
tree_for_ggplot <- groupOTU(tree_for_ggplot, branch_groups)
tree_for_ggplot_with_original_string<-ggtree(tree_for_ggplot)
tree_for_ggplot$tip.label[grepl('colobus',tree_for_ggplot$tip.label)]='Black-and-white\ncolobus'
tree_for_ggplot$tip.label[grepl('mole-rat',tree_for_ggplot$tip.label)]='Naked\nmole-rat'
tree_for_ggplot$tip.label[grepl('lemur',tree_for_ggplot$tip.label)]='Ring-tailed\nlemur'
subplot_label_fontsize=17
### PLOT PHYLOGENETIC TREE
treeplot=ggtree(tree_for_ggplot,layout='fan',size=1.5,open.angle = 160)+
geom_cladelab(data = clade_df,
mapping = aes(node = node, label = name, color =name),
fontsize = 12,fontface='bold',textcolour='black',offset.text=2,offset=50,barsize=5,angle='auto',align=FALSE)+
geom_hilight(data = clade_df,
mapping = aes(node = node, label = name, fill = name),show.legend= FALSE)+
geom_hilight(node=12,fill=my_cols[4])+
geom_tiplab(size=12,show.legend= FALSE,offset = 1,geom='text',lineheight=0.8)+
theme(plot.margin = unit(c(5,0,-12,3), "cm"),
legend.text = element_text(size=28,face='bold'),
legend.key.size = unit(1.9, 'cm'),
legend.position=c(0.95,0.5),
legend.title=element_text(size=33,face='bold'))+
scale_color_manual(name=NULL,values=c(my_cols,'black'), breaks=names(branch_groups),guide='none')+
scale_fill_manual(name=NULL,values=c(my_cols,'black'), breaks=names(branch_groups),guide='none')+
guides(override.aes = aes(label = ""),color = guide_legend(override.aes = list(linetype = 0, size=15)))
## Rotate tree and scale it up a little bit
treeplot=ggplotify::as.ggplot(treeplot, angle=-15, scale=1.15,hjust=0.05) +
annotate("text", label = "bold(a)", x = 0.1, y = 0.9,size=subplot_label_fontsize,parse=TRUE)
#### HEATMAP
## Reorder dataframe for heatmap visualization according to phyl.tree order
data_for_heatmap=d[match(get_taxa_name(tree_for_ggplot_with_original_string),d$common_name),colnames_for_anal]
rownames(data_for_heatmap)=gsub('_',' ',get_taxa_name(tree_for_ggplot_with_original_string))
## Rename specific metabolic rate, drop W_per_g from the end
colnames(data_for_heatmap)[grepl('spec_metabolic',colnames(data_for_heatmap))]='log_mass_specific_BMR'
## Reshape one string and add Female and Male symbol
rownames(data_for_heatmap)[grepl('colobus',rownames(data_for_heatmap))]='Black-and-white\ncolobus'
rownames(data_for_heatmap)[grepl('mole-rat',rownames(data_for_heatmap))]='Naked\nmole-rat'
rownames(data_for_heatmap)[grepl('lemur',rownames(data_for_heatmap))]='Ring-tailed\nlemur'
colnames(data_for_heatmap)[grepl('female',colnames(data_for_heatmap))]=paste0('log_time_to_sexual_maturity_','\U2640')
colnames(data_for_heatmap)[grepl('male',colnames(data_for_heatmap))]=paste0('log_time_to_sexual_maturity_','\U2642')
## Substitute underscore for space and capitalize the strings
colnames(data_for_heatmap)=gsub('_',' ',colnames(data_for_heatmap))
colnames(data_for_heatmap)=tools::toTitleCase(tolower(colnames(data_for_heatmap)))
colnames(data_for_heatmap)=gsub('Mass Specific Bmr','Mass-Specific BMR',colnames(data_for_heatmap))
## PLOT HEATMAP OF TRAITS
hmap=Heatmap((scale(data_for_heatmap)),
rect_gp = gpar(col = "black", lwd = 3),
color=colorRampPalette(c("navy", "white", "red"))(3),
cluster_rows=FALSE,
row_split=factor(d[match(get_taxa_name(tree_for_ggplot_with_original_string),d$common_name),'phylogenetic_grandorder_order'],
levels=orders_grandorders),
row_gap = unit(10, "mm"),
row_title=c(NULL),
row_names_gp = gpar(fontsize = 32), #add 'col=my_cols,' to the gpar() function color y labels
column_names_gp = gpar(fontsize = 32),
column_names_side = "top",
top_annotation =HeatmapAnnotation(ann=anno_empty(border = FALSE),
height = unit(2.3, "cm")),
row_names_side = "left",
column_dend_height = unit(30, "mm"),
column_dend_side = "bottom",
column_dend_gp=gpar(lwd=5),
width = ncol(data_for_heatmap)*unit(26, "mm"),
height = nrow(data_for_heatmap)*unit(24, "mm"),
column_names_rot = 50,
heatmap_legend_param = list(title='Scaled\nvalues\n',
title_gp = gpar(fontsize = 25,fontface='bold'),
title_position = "topcenter",
direction = "vertical",
labels_gp= gpar(fontsize = 20,fontface='bold'),
legend_height = unit(8, "cm"),
grid_width = unit(2, "cm")))
hmap=as.ggplot(hmap)+theme(plot.margin = unit(c(-5,0,-12,0),"cm"))+
annotate("text", label = "bold(b)", x = 0.2, y = 0.88,size=subplot_label_fontsize,parse=TRUE)
### PLOT PCA
## Extract data from dataframe containing all the data
d_num_for_pca=d[,colnames_for_anal]
## Substitute "_" for whitespace in the common animal names
rownames(d_num_for_pca)=gsub('_',' ',d[,'common_name'])
## Add newline chracter for animal names that are too long for one line
rownames(d_num_for_pca)[grepl('colobus',rownames(d_num_for_pca))]='Black-and-white\ncolobus'
rownames(d_num_for_pca)[grepl('mole-rat',rownames(d_num_for_pca))]='Naked\nmole-rat'
rownames(d_num_for_pca)[grepl('lemur',rownames(d_num_for_pca))]='Ring-tailed\nlemur'
## Calculate PCA results
res.pca=PCA((d_num_for_pca),graph=F)
# Extract PCA results as dataframe
pca_df=as.data.frame(res.pca$ind$coord[,c(1,2)],check.names = FALSE)
colnames(pca_df) <- gsub( "\\.","", colnames(pca_df))
pca_df['species_names']=rownames(pca_df)
## Add non-numeric data to dataframe for eventual coloring of PCA plot
d_num_for_pca['species_name']=rownames(d_num_for_pca)
d_num_for_pca[,'phylogenetic_grandorder_order']=d[,'phylogenetic_grandorder_order']
labels_for_pca=factor(d[,'phylogenetic_grandorder_order'],
levels=orders_grandorders)
pca_plot=ggplot(data=pca_df,aes_string(x='Dim1',y='Dim2',label='species_names'))+
geom_hline(yintercept=0,color = "grey",size = 1,linetype = 2)+
geom_vline(xintercept=0,color = "grey",size = 1,linetype = 2)+
geom_point(size = 9,aes(color=labels_for_pca))+
geom_text_repel(size = 13,lineheight = .8,box.padding = 1,min.segment.length = Inf,max.overlaps = 20)+
theme(text = element_text(size = 30),
legend.title=element_blank(),
legend.text=element_text(size=32),
legend.position=c(.88,.8),
legend.background = element_rect(colour = 'black', fill = 'white', size = 1),
axis.title.x=element_text(size=28,face='bold'),
axis.text.x=element_text(size=28,face='bold'),
axis.text.y=element_text(size=28,face='bold'),
axis.title.y=element_text(size=28,face='bold'),
panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = unit(c(-8,0,0,0),"cm")
)+
scale_color_manual(name = '',labels = levels(labels_for_pca),values= my_cols)+
guides(color = guide_legend(override.aes = list(linetype = 0, size=9)))+
ggtitle('')
pca_plot=pca_plot + annotate("text", label = "bold(c)", x = -5.8, y =2,size=subplot_label_fontsize,parse=TRUE)
first_col=cowplot::plot_grid(treeplot,pca_plot,nrow = 2,rel_heights=c(0.6, 0.4))
whole_plot=cowplot::plot_grid(first_col,hmap,ncol=2,rel_heights=c(0.4, 0.6),rel_widths=c(1,1))
print(whole_plot)
ggsave(filename=('life_expectancy_figs/Figure 1.pdf'),plot=whole_plot,limitsize = FALSE)
```
#### Creation of Supplementary Figure 1 & 2:
#### Diagnostic plots of univariete and multivariate linear regressions with/without phylogenetic correction, in order to detect phylogenetic signal in the residuals
```{r fig.width=13, fig.height=7}
# function to extract legend from plot
get_only_legend <- function(plot) {
plot_table <- ggplot_gtable(ggplot_build(plot))
legend_plot <- which(sapply(plot_table$grobs, function(x) x$name) == "guide-box")
legend <- plot_table$grobs[[legend_plot]]
return(legend)
}
## Variables to consider
lm_colnames=c('log_somatic_mutation_rate','log_respiratory_rate','log_resting_heart_rate','log_adult_mass',
'log_female_sexual_maturity','log_male_sexual_maturity','litter_size','log_mass_specific_BMR')
## Dependent variables in linear model
dependent_vars=c('log_lifespan')
## Model types
models=c('univariate','multivariate')
## Create list with fixed independent variable names. This/These variable(s) will be used as a fixed independent variable(s) in the model alongside one additional independent variable that is added to the formula. If fixed independent variable==NULL, then only the additional independent variable as considered in the model
fixed_independent_varnames=list(NA,'log_somatic_mutation_rate')
## Create list for the letters used for labeling the subplots
subplot_letters=c('a','b','c','d','e','f','g','h')
n_of_figs=0
for (n in 1:length(models)){
## Fixed independent variables in linear model
fixed_independent_vars=fixed_independent_varnames[n]
## Select column names (=variables) to loop through -> drop the variables that are used for the model fitting
colnames_to_loop=lm_colnames[!is.element(lm_colnames,fixed_independent_vars)]
## Subset the subplot letters to the number of variables (number of subplots is equal to number of variables used as independent variables)
fig_subplot_letters=subplot_letters[1:length(colnames_to_loop)]
## Create list to collect the grid object of plots (3x2 subplots per object) into -> all the subplots of a figure
subplot_list=list()
for (k in 1:length(colnames_to_loop)){
## Set the variable that is used in modelling
var=colnames_to_loop[k]
subplot_letter=fig_subplot_letters[k]
## Create formula dependent_var ~ fixed_independent_vars + var
## If fixed_independent_vars==NA, then just consider dependent_vars~var in model, so the 'NULL' won't show as string later in figure subtitle
if (is.na(fixed_independent_vars)==TRUE){
fo=as.formula(paste(c(dependent_vars,var),collapse='~'))
} else {
fo=as.formula(paste(c(dependent_vars,c(paste(c(fixed_independent_vars,var),collapse = '+'))),collapse='~'))}
## Fit OLS linear model
lm_model=lm(formula=fo,data=d)
## Fit phylogenetic generalised linear model (PGLS) with a fixed lambda value=1
## -> This assumes a strong phylogenetic signal (==Brownian-motion evolution model of the traits)
pgls_model=pgls(fo,data = comp_dat, lambda = 1)
## Create list of models to loop through
model_list=list(lm_model,pgls_model)
names(model_list)=c("No phylogenetic signal (Pagel's lambda=0)","Strong phylogenetic signal (Pagel's lambda=1)")
## Create list to collect individual plots into
individual_plot_list=list()
## Subplots labels
subplot_labels=list(first_row=c('a','b','c'),second_row=c('d','e','f'))
for (j in 1:length(names(model_list))){
model_name=names(model_list)[j]
model=model_list[[model_name]]
subplot_row=names(subplot_labels)[j]
dd=data.frame(resid(model))
colnames(dd)='residuals'
dd[,'common_name']=d[base::match(rownames(dd),d$species_name),'common_name']
dd[,'phylogenetic_grandorder_order']=d[base::match(dd$common_name,d$common_name),'phylogenetic_grandorder_order']
dd[,'Fitted']=fitted(model)
dd[,'scaled_residuals']=scale(dd[,'residuals'])
dd[,'sqrt_abs_std_residuals']=sqrt(abs(scale(dd[,'residuals'])))
## Calcluate QQplot x and y axis values, and then add them to a dataframe in order to label the species afterwards in further plots
## as point labeling is not available in stat_qq
p=ggplot(data=dd,aes(sample=scaled_residuals)) + # Create QQplot with ggplot2 package
stat_qq()+
stat_qq_line()
## Save the x and y values from QQplot and add them to the data (order them as the data)
ordered_resid_df=ggplot_build(p)$data[[1]]
dd[,'x']=ordered_resid_df[order(match(ordered_resid_df$y,dd$scaled_residuals)),'x']
dd[,'y']=ordered_resid_df[order(match(ordered_resid_df$y,dd$scaled_residuals)),'y']
## Initialize plot text fontsizes
plot_title_size=20
axis_label_size=20
axis_text_size=17
legend_text_size=20
point_size=3
plot_text_size=6
subplot_label_x_coord=0.09
subplot_label_y_coord=0.95
my_cols=qualitative_hcl(length(unique(d$phylogenetic_grandorder_order)),palette = "Dark 3")
labels_for_legend=factor(d[,'phylogenetic_grandorder_order'],
levels=c('Carnivora','Ungulata','Rodentia','Lagomorpha','Primates'))
## Plot QQplot and save it to list
p1=ggplot(data=dd,aes_string(x='x',y='y',color='phylogenetic_grandorder_order'))+geom_point(size=point_size)+
geom_text_repel(label = dd$common_name,size=plot_text_size)+
ggtitle(paste('QQplot','\n',model_name,sep=' '))+
stat_qq_line(aes_string(sample='y'),col = "black")+
theme(plot.title = element_text(size=plot_title_size,face='bold',hjust = 0.5),
axis.text.x=element_text(size=axis_text_size),
axis.title.x=element_text(size=axis_label_size),
axis.text.y=element_text(size=axis_text_size),
axis.title.y=element_text(size=axis_label_size))+
xlab('Theoretical quantiles')+ylab('Sample quantiles')+
scale_color_manual(name = '',breaks=levels(labels_for_legend),values= my_cols)+
labs(tag=subplot_labels[[subplot_row]][1])+
theme(legend.position="none",
plot.tag = element_text(face = 'bold',size=axis_label_size*2),
plot.tag.position = c(subplot_label_x_coord, subplot_label_y_coord))
individual_plot_list[paste(var,model_name,'qqplot',sep='_')]=list(p1)
## PLot residual plot and save it to a list
p2=ggplot(data=dd,aes(x=Fitted,y=scaled_residuals,color=phylogenetic_grandorder_order))+
geom_point(size=point_size,show.legend = FALSE)+
geom_text_repel(label = dd$common_name,size=plot_text_size)+
Hmisc::stat_plsmo(aes(x=Fitted,y=residuals),inherit.aes=FALSE)+
ggtitle(paste('Residual plot','\n',model_name,sep=' '))+
theme(plot.title = element_text(size=plot_title_size,face='bold',hjust = 0.5),
axis.text.x=element_text(size=axis_text_size),
axis.title.x=element_text(size=axis_label_size),
axis.text.y=element_text(size=axis_text_size),
axis.title.y=element_text(size=axis_label_size))+
scale_color_manual(name = '',breaks=levels(labels_for_legend),values= my_cols)+
labs(tag=subplot_labels[[subplot_row]][2])+
theme(legend.position="none",
plot.tag = element_text(face = 'bold',size=axis_label_size*2),
plot.tag.position = c(subplot_label_x_coord, subplot_label_y_coord))+
ylim(c(-2.5,2.5))+
ylab('Scaled residuals')
individual_plot_list[paste(var,model_name,'residual_plot',sep='_')]=list(p2)
## PLot Scale-location plot
p3=ggplot(data=dd,aes(x=Fitted,y=sqrt_abs_std_residuals,color=phylogenetic_grandorder_order))+
geom_point(size=point_size)+
geom_text_repel(label = dd$common_name,size=plot_text_size)+
Hmisc::stat_plsmo(aes(x=Fitted,y=sqrt_abs_std_residuals),inherit.aes=FALSE)+
ggtitle(paste('Scale-location','\n',model_name,sep=' '))+
theme(plot.title = element_text(size=plot_title_size,face='bold',hjust = 0.5),
axis.text.x=element_text(size=axis_text_size),
axis.title.x=element_text(size=axis_label_size),
axis.text.y=element_text(size=axis_text_size),
axis.title.y=element_text(size=axis_label_size))+
scale_color_manual(name = '',breaks=levels(labels_for_legend),values= my_cols)+
labs(tag=subplot_labels[[subplot_row]][3])+
theme(legend.position="none",
plot.tag = element_text(face = 'bold',size=axis_label_size*2),
plot.tag.position = c(subplot_label_x_coord*1.3, subplot_label_y_coord))+
ylim(c(0,2))+
ylab(bquote(sqrt('|Scaled residuals|')))+
theme(legend.position="none")
individual_plot_list[paste(var,model_name,'scale_location_plot',sep='_')]=list(p3)
## Create ggplot object just to extract its legend
plot_for_legend=ggplot(data=dd,aes(x=Fitted,y=residuals))+
geom_point(aes(color=phylogenetic_grandorder_order))+
theme(legend.text=element_text(size=legend_text_size,face='bold'))+
scale_color_manual(name = '',labels = levels(labels_for_legend),values= my_cols)+
guides(fill=guide_legend(title=''),
color = guide_legend(override.aes = list(size = 5)))
## Extract legend
combined_legend=get_only_legend(plot_for_legend)
}
## Create grid of 6 plots
plots=cowplot::plot_grid(plotlist = individual_plot_list,ncol = 3)
## Add legend to the plot
plots_with_legend=cowplot::plot_grid(plots,combined_legend,ncol=2,rel_widths = c(1, .087))
## Create title to subplot
title_string=tools::toTitleCase(tolower(gsub('_',' ',format(fo))))
title_string=gsub('Mass Specific Bmr','Mass-Specific BMR',title_string)
subplot_title <- ggdraw() + draw_label(title_string, size = 35,fontface='bold') +
theme(plot.background = element_rect(fill="white", color = NA))
## Add title to subplot
plots_with_legend_title=cowplot::plot_grid(subplot_title, plots_with_legend, ncol=1, rel_heights=c(0.06, 0.99),scale = 1)
print(plots_with_legend_title)
fig_name=paste('Supplementary Figure',n_of_figs+k)
ggsave(filename=paste0('life_expectancy_figs/',fig_name,'.pdf'),plot=plots_with_legend_title,limitsize = FALSE,dpi = 300,bg='white')
ggsave(filename=paste0('life_expectancy_figs/',fig_name,'.png'),plot=plots_with_legend_title,limitsize = FALSE,dpi = 300,bg='white')
}
n_of_figs=k
}
```
#### 1. Create bootstrapped dataset from original dataset
#### 2. Calculate adjusted R^2 values for the bootstrapped datasets
### 3. Calculate Pearson correlations for the boostrapped datasetto ompare Pearson's R's P-value distributions between traits
```{r}
library(broom)
### Names of the variables we want to include in the analysis
partial_corr_colnames=c('log_lifespan','log_somatic_mutation_rate','log_respiratory_rate','log_resting_heart_rate','log_adult_mass',
'log_female_sexual_maturity','log_male_sexual_maturity','litter_size',
'log_mass_specific_BMR')
### Determine if phylogenetic correction is wanted
phylogenetic_correction='no'
### Select variables to regress out
var_to_regress_out=c('log_somatic_mutation_rate')
### Select variable to correlate with (=variable on the y-axis)
var_to_correlate_with=c('log_lifespan')
## Create list of variables to loop through -> drop the variables that are used in regression
vars_to_loop_through=partial_corr_colnames[!is.element(partial_corr_colnames,c(var_to_regress_out,var_to_correlate_with))]
#vars_to_loop_through=c('log_mass_specific_BMR','log_heart_rate')
## Created nested list for collecting the results of bootstrapping
bootstrapped_results=list()
result_names_list=c('Pearson p-value','adj. Pearson p-value')
for (i in 1:length(vars_to_loop_through)) {
bootstrapped_results[[vars_to_loop_through[i]]] = replicate(length(result_names_list), list(), simplify = FALSE)
names(bootstrapped_results[[vars_to_loop_through[i]]])=result_names_list
}
## Create bootsrapped dataset
bootstrapped_samples <- list()
num_samples <- 1000
r_squared_mutation_only <- c()
r_squared_mutation_heart <- c()
set.seed(1)
for (i in 1:num_samples) {
bootstrapped_sample <- sample_n(d, size = nrow(d), replace = TRUE)
bootstrapped_samples[[i]] <- bootstrapped_sample
}
### Loop over bootstrapped dataset and calculate the Partial correlation for each dataset and collect the results
for (bootstrapped_d in bootstrapped_samples){
##### CALULCATION OF R^2 COMPARISON
## Calculate the adjusted R^2 values of the 2 models and save them in vectors
somatic_fit <- lm((log_lifespan) ~ (log_somatic_mutation_rate), data = bootstrapped_d)
stats_somatic_fit <- glance(somatic_fit)
r_squared_mutation_only <- c(r_squared_mutation_only, stats_somatic_fit$adj.r.squared)
somatic_heart_fit <- lm((log_lifespan) ~ (log_resting_heart_rate) + (log_somatic_mutation_rate), data = bootstrapped_d)
stats_somatic_heart <- glance(somatic_heart_fit)
r_squared_mutation_heart <- c(r_squared_mutation_heart, stats_somatic_heart$adj.r.squared)
###### CALCULATION OF PEARSON CORRELATION
## Initialize dataframe to collect the partial pearson correlation results into if no phylogenetic correction
partial_pearson_corr_result_df=data.frame(matrix(NA,ncol=length(result_names_list),nrow=length(vars_to_loop_through),
dimnames=list(vars_to_loop_through,result_names_list)))
colnames(partial_pearson_corr_result_df)=result_names_list
### Loop through remaining variables and perform the steps described above in points 3-5.
for (var in vars_to_loop_through){
# Drop datapoints that are NAN and select only numeric columns + necessary metadata:
# Species common name + nutrition + litter_size + body_temperature
df=bootstrapped_d[!is.na(bootstrapped_d[,var]),]
df=as.data.frame(df)
df=df[,c(partial_corr_colnames,'common_name','species_name')]
### Create linear models in order to regress out variables
# Create formula necessary for regressing out 'var_to_regress_out'
vars_to_regress_out_form=paste(var_to_regress_out,collapse='+')
form_regress_out=as.formula(paste(c(var_to_correlate_with,vars_to_regress_out_form),collapse='~'))
# Create formula necessary for regressing out 'var'
form_var=as.formula(paste(c(var,vars_to_regress_out_form),collapse='~'))
# Check if phylogenetic correction is yes/no
if (phylogenetic_correction=='no'){
## Execute partial correlation function and save + print result
## If during the calculation of correlations unreliable R or p-values are calculated due to encountering singularity in the
# variance-covariance matrix, set those R and p-values as NaNs
# Pearson partial correlation
possibleError=tryCatch({pearson_res=(pcor.test(df[,var_to_correlate_with],df[,var],df[,var_to_regress_out],method='pearson'))
pearson_p_value=pearson_res$p.value
},error=function(e){return(e)},
warning=function(w){return(w)})
if(inherits(possibleError, "error")|inherits(possibleError, "warning")){
pearson_p_value=NA}
## Add results to dataframe
partial_pearson_corr_result_df[var,]=c(pearson_p_value,NA)
# Crate title of plot
plot_title=paste(c('Corrected for:',paste(var_to_regress_out,collapse='+')),collapse=' ')
}
}
partial_pearson_corr_result_df[,'adj. Pearson p-value']=p.adjust(partial_pearson_corr_result_df[,'Pearson p-value'],method='BH')
for (rown in rownames(partial_pearson_corr_result_df)){
for (coln in colnames(partial_pearson_corr_result_df))
bootstrapped_results[[rown]][[coln]]=append(bootstrapped_results[[rown]][[coln]],partial_pearson_corr_result_df[rown,coln])
}
}
saveRDS(bootstrapped_results,"bootstrapped_partial_corr_results.rds")
saveRDS(r_squared_mutation_heart, "bootstrapped_r_squared_mutation_heart.rds")
saveRDS(r_squared_mutation_only, "bootstrapped_r_squared_mutation_only.rds")
```
### Supplementary Figure 16
### Plot the results of linear regression-R^2 bootsrapping analysis
```{r fig.width=2.5, fig.height=1.8}
r_squared_mutation_heart=readRDS("bootstrapped_r_squared_mutation_heart.rds")
r_squared_mutation_only=readRDS("bootstrapped_r_squared_mutation_only.rds")
plot_df <- data.frame(
'Model type' = c( rep("Somatic mutation rate only", 1000), rep("Somatic mutation rate + \nResting heart rate", 1000) ),
value = c( r_squared_mutation_only, r_squared_mutation_heart),check.names = FALSE
)
## Create ggplot
p=ggplot(data=plot_df,aes(x=value, fill=`Model type`,y=..density..)) +
geom_histogram(alpha = 0.5,position="identity") +
geom_density(alpha = 0.25)+
labs(x='adj. R-squared values')+
theme(legend.position=c(0.3,0.8),
axis.title.x=element_text(size=15),
panel.background = element_rect(fill = "white"),
axis.line.y.left = element_line(color = "black"),
axis.line.x.bottom = element_line(color = "black"),
plot.title = element_text(size = 13, face = "bold",hjust = 0.5)) +
ggtitle('Adj. R-squared values of two models \npredicting lifespan (1000 resamples)')
ggsave('life_expectancy_figs/Supplementary Figure 16.png',plot=p,width = 2*2.1, height = 2*1.5,dpi=600)
print(p)
```
### Supplementary Figure 17
### Plot the results of partial correlation bootsrapping analysis
```{r fig.width=3.7, fig.height=2.2}
library(reshape2)
bootstrapped_results=readRDS("bootstrapped_partial_corr_results.rds")
# function to extract legend from plot
get_only_legend <- function(plot) {
plot_table <- ggplot_gtable(ggplot_build(plot))
legend_plot <- which(sapply(plot_table$grobs, function(x) x$name) == "guide-box")
legend <- plot_table$grobs[[legend_plot]]
return(legend)
}
## Variables to compare in partial correlation analysis
vars_to_compare=c('log_resting_heart_rate','log_mass_specific_BMR')
my_cols=qualitative_hcl(length((vars_to_compare)),palette = "Pastel 1")
## List to save the plots
plot_list=list()
for (log_transform_p in (c(TRUE))){
for (i in seq_along(result_names_list)){
correlation_result_metric=result_names_list[i]
plot_df=data.frame()
## Set significance level to draw in the plot as a vertical line
if (grepl('adj', correlation_result_metric,fixed=TRUE)){
sign_level=0.2
} else {sign_level=0.05}
## Create dataframe out of the bootstrapped results of the variables considered for the plot
for (var in vars_to_compare){
plot_df=rbind(plot_df,unlist(bootstrapped_results[[var]][[correlation_result_metric]]))}
## Rename colnames and omit NAN values
plot_df=t(plot_df)
colnames(plot_df)=vars_to_compare
rownames(plot_df)=NULL
plot_df=na.omit(plot_df)
## Melt plot_df to make coloring of the two distributions easier in ggplot
melted_df=melt(plot_df)
colnames(melted_df)=c('Bootstrap_number','variable','value')
## Calculate the -log10 transforms of p-vales and significance levels
if (log_transform_p==TRUE){
melted_df$log_value=-log10(melted_df$value)
x_label = substitute(log[10] * "(p-value)", list(log = paste0(gsub('p-value','',correlation_result_metric),'-log')))
sign_level=-log10(sign_level)
} else {x_label=correlation_result_metric}
## Create ggplot
p=ggplot(data=melted_df,aes(x = log_value, fill = variable,y=..density..)) +
geom_histogram(alpha = 0.5,position="identity") +
geom_density(alpha = 0.25)+
labs(x=(x_label))+
theme(legend.position="none",
axis.title.x=element_text(size=15),
panel.background = element_rect(fill = "white"),
axis.line.y.left = element_line(color = "black"),
axis.line.x.bottom = element_line(color = "black"))+
geom_vline(xintercept = sign_level, color="red",
linetype="dashed", size=1)
## Clean up names of variables
legend_labels=gsub('_',' ',vars_to_compare)
legend_labels=gsub('log','',legend_labels)
legend_labels=tools::toTitleCase(tolower(legend_labels))
legend_labels=gsub('Mass Specific Bmr','Mass-Specific BMR',legend_labels)
## Create a plot only to extract the legend from it that willbe shown on the plot_grid object at the end
plot_for_legend=ggplot(data=melted_df,aes(x = value, fill = variable,y=..density..)) +
geom_histogram(alpha = 0.7,position="identity") +
geom_density(alpha = 0.25)+
theme(legend.background = element_rect(fill = NA))+
scale_fill_manual(name='Trait',labels = legend_labels, values=my_cols)+
theme(legend.text = element_text(size=12),
legend.title = element_text(size=14))+
ggtitle(correlation_result_metric)
## Save created plots and legends in a list
plot_list[correlation_result_metric]=list(p)
combined_legend=get_only_legend(plot_for_legend)
}
## Add the 2 ggplots objects to a plot_grid object + add legend in the second step
p2=plot_grid(plotlist = plot_list[c('Pearson p-value')],ncol = 1) #
p2_with_legend=plot_grid(p2,combined_legend,ncol = 2,rel_widths = c(0.8, 0.3),scale=1.1)
## Create title for the final plot
subplot_title <- ggdraw() + draw_label(paste0('Significance levels of partial correlation with lifespan \nafter correcting for somatic mutation rate (',num_samples,' resamples)'),y=0.7,size = 14,fontface='bold')
## Add title to plot
plots_with_legend_title=plot_grid(subplot_title, p2_with_legend, ncol=1, rel_heights=c(0.2, 0.95),scale = 0.95)
## Save figure as pdf
ggsave2(filename=paste0('life_expectancy_figs/','Supplementary Figure 17','.pdf'),plot=plots_with_legend_title,limitsize = FALSE)
ggsave2(filename=paste0('life_expectancy_figs/','Supplementary Figure 17','.png'),plot=plots_with_legend_title,bg='white',
limitsize = FALSE,dpi=600)
print(plots_with_legend_title)
}
```