-
Notifications
You must be signed in to change notification settings - Fork 0
/
thresholds.R
80 lines (62 loc) · 2.5 KB
/
thresholds.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
######################################################################
### Allen Institute Cell Lineage Reconstruction DREAM Challenge
### Renata Retkute, 2020
######################################################################
library(phytools)
library(gbm)
library(ggplot2)
library(treeman)
require(ggplot2)
theme_set(theme_bw())
library(RColorBrewer)
require(gridExtra)
library(cowplot)
library(ggpubr)
source("train.R")
source("thresholds_func.R")
# Thresholds of clustering for each layer in phylogenin tree
thresholds<-c(0.3, 0.1, 0.05, 0.01, 0.005)
n.trees<-100
# File with ground trouth (columns with IDs and nw)
train.set<-"train_setDREAM2019.txt"
training_data <- read.table(train.set, header=T, colClasses = "character")
# Directory with training data
train.dir<-"train_data"
# Directory with testing data
test.dir<-"test_data"
######################################################################
# Fitted model
######################################################################
ls.tr<-list.files(train.dir)
n.train<-length(ls.tr)
i<-1
xx <- read.table(paste0(train.dir, "/sub1_train_",i,".txt"), header=T, colClasses="character")
lng<- make.lineage(xx, fit.model, thresholds, n.trees)
observed<-training_data$ground[i]
par(mfrow = c(2, 1))
plot(read.newick(text = lng), main=paste0('Test: ',i))
plot(read.newick(text = observed))
tree_1<-readTree(text=paste0(observed))
tree_2<-readTree(text = lng)
score<- calcDstRF(tree_1, tree_2, nrmlsd = T)
triplets<-calcDstTrp(tree_1, tree_2, nrmlsd = T, parallel = F, progress="text")
c(score, triplets)
ans<-data.frame(set=c(), score=c(), triplets=c())
for(i in 1:n.train){
xx <- read.table(paste0(train.dir, "/sub1_train_",i,".txt"), header=T, colClasses="character")
lng<- make.lineage(xx, fit.model, thresholds, n.trees)
observed<-training_data$ground[i]
par(mfrow = c(2, 1))
plot(read.newick(text = lng), main=paste0('Test: ',i))
plot(read.newick(text = observed))
tree_1<-readTree(text=paste0(observed))
tree_2<-readTree(text = lng)
score<- calcDstRF(tree_1, tree_2, nrmlsd = T)
if(is.na(score)) score<-1
triplets<-calcDstTrp(tree_1, tree_2, nrmlsd = T, parallel = F, progress="text")
if(is.na(triplets)) triplets<-1
cat(c(i, "", score, "", triplets, "\n"))
ans<-rbind(ans, data.frame( set=i, score=score, triplets=triplets))
}
plot(ans$score, ans$triplets, xlab='Robinson-Foulds distance', ylab='Triplet distance')
c(mean(ans$score), mean(ans$triplets))