Skip to content

Commit

Permalink
test topology of original and bootstrap trees when calculating confid…
Browse files Browse the repository at this point in the history
…ence intervals
  • Loading branch information
Thu-Hien To authored and Thu-Hien To committed Aug 21, 2020
1 parent c16ac80 commit 2e0e433
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 15 deletions.
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ Something new lsd2:
- Use variance in estimating the root position.
- Can use variances twice where the second time uses the branches lengths of the estimated tree to calculate variances.
- Accept date format year-month-date
- Compute confidence intervals using bootstrap trees
- Compute confidence intervals using bootstrap trees

Binary file modified bin/lsd2_mac
Binary file not shown.
Binary file modified bin/lsd2_unix
Binary file not shown.
11 changes: 9 additions & 2 deletions src/confidence_interval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ void computeIC_bootstraps(InputOutputStream *io, Pr* pr,Node** nodes,double* &T_
for (int i=0; i<= pr->nbBranches; i++){
minblen[i] = nodes[i]->minblen;
}
int original_nbInodes = pr->nbINodes;
int original_nbBranches = pr->nbBranches;
double original_objective = pr->objective;
int s = 0;
if (io->inOutgroup){
extrait_outgroup(io, pr, true);
Expand Down Expand Up @@ -219,6 +222,9 @@ void computeIC_bootstraps(InputOutputStream *io, Pr* pr,Node** nodes,double* &T_
if (pr->splitExternal) splitExternalBranches(pr,nodes_bootstrap);
initConstraint(pr, nodes_bootstrap);
if (pr->e>0) remove_outlier_nodes(pr,nodes_bootstrap);
if ((original_nbInodes != pr->nbINodes) || (original_nbBranches != pr->nbBranches) || !checkTopology(pr,nodes,nodes_bootstrap)){
myExit("The bootstrap tree %d does not have the same topology as the original tree\n",y+1);
}
if (!pr->constraint){//LD without constraints
if (pr->estimate_root==""){
without_constraint_multirates(pr,nodes_bootstrap,true);
Expand Down Expand Up @@ -257,6 +263,7 @@ void computeIC_bootstraps(InputOutputStream *io, Pr* pr,Node** nodes,double* &T_
for (int i=0;i<=pr->nbBranches;i++) delete nodes_bootstrap[i];
delete[] nodes_bootstrap;
}
pr->objective = original_objective;
sort(rho_bootstrap,pr->nbBootstrap);
rho_left=rho_bootstrap[int(0.025*pr->nbBootstrap)];
rho_right=rho_bootstrap[pr->nbSampling-int(0.025*pr->nbBootstrap)-1];
Expand Down Expand Up @@ -318,14 +325,14 @@ void computeIC_bootstraps(InputOutputStream *io, Pr* pr,Node** nodes,double* &T_
}

void output(double br,int y, InputOutputStream *io, Pr* pr,Node** nodes,ostream& f,ostream& tree1,ostream& tree2,ostream& tree3,int r){
if (pr->outlier.size()>0){
/*if (pr->outlier.size()>0){
std::ostringstream oss;
oss<<"- There are "<<pr->outlier.size()<<" nodes that are considered as outliers and were excluded from the analysis:\n";
for (int i=0;i<pr->outlier.size();i++){
oss<<" "<<nodes[pr->outlier[i]]->L<<", suggesting date "<<nodes[pr->outlier[i]]->D<<"\n";
}
pr->resultMessage.push_back(oss.str());
}
}*/
if (!pr->constraint && pr->ci) {
std::ostringstream oss;
oss<<"- Confidence intervals are not warranted under non-constraint mode.\n";
Expand Down
1 change: 0 additions & 1 deletion src/dating.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,6 @@ bool with_constraint_active_set(Pr* pr,Node** &nodes){
for (int i=0;i<=pr->nbBranches;i++) nodes[i]->D=D_old[i];
}
computeObjective(pr,nodes);

delete[] D_old;
delete[] dir;
return val;
Expand Down
28 changes: 24 additions & 4 deletions src/estimate_root.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,18 @@ bool without_constraint_lambda(double br,Pr* &par,Node** &nodes,list<int> active
int r=(*iter);//r=r1
iter++;
int pr=(*iter);//pr=r2
if (r >= par->nbINodes && nodes[r]->type == 'n') return false;
if (pr >= par->nbINodes && nodes[pr]->type == 'n') return false;
if (r >= par->nbINodes && nodes[r]->type == 'n'){
if (par->estimate_root == "k"){
myExit("Either provide dates for your outgroups or remove them with option -G\n");
}
return false;
}
if (pr >= par->nbINodes && nodes[pr]->type == 'n'){
if (par->estimate_root == "k"){
myExit("Either provide dates for your outgroups or remove them with option -G\n");
}
return false;
}
int l=0;
for (int i=par->nbINodes;i<=par->nbBranches;i++){
if (leaf(nodes[i])) l++;
Expand Down Expand Up @@ -428,8 +438,18 @@ bool with_constraint_lambda(double br,Pr* &pr,Node** &nodes,list<int> active_set
int r=(*iter);//r=r1
iter++;
int p_r=(*iter);//pr=r2
if (r >= pr->nbINodes && nodes[r]->type == 'n') return false;
if (p_r >= pr->nbINodes && nodes[p_r]->type == 'n') return false;
if (r >= pr->nbINodes && nodes[r]->type == 'n'){
if (pr->estimate_root == "k"){
myExit("Either provide dates for your outgroups or remove them with option -G\n");
}
return false;
}
if (p_r >= pr->nbINodes && nodes[p_r]->type == 'n'){
if (pr->estimate_root == "k"){
myExit("Either provide dates for your outgroups or remove them with option -G\n");
}
return false;
}
if (br==0) {
nodes[0]->B=0;
nodes[p_r]->B=0;
Expand Down
7 changes: 4 additions & 3 deletions src/options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Pr* getOptions( int argc, char** argv )

Pr* getCommandLine( int argc, char** argv)
{
const string VERSION="v.1.8.7";
const string VERSION="v.1.8.8";
Pr* opt = new Pr();
int c;
string s;
Expand Down Expand Up @@ -356,7 +356,7 @@ Pr* getInterface()

void printInterface(ostream& in, Pr* opt)
{
const string VERSION = "v.1.8.7";
const string VERSION = "v.1.8.8";

in<<"\nLEAST-SQUARE METHODS TO ESTIMATE RATES AND DATES - "<<VERSION<<" \n\n";
in<<"\nInput files:\n";
Expand Down Expand Up @@ -513,7 +513,7 @@ void printHelp( void )
const string BOLD = "\033[00;01m";
const string LINE = "\033[00;04m";
const string FLAT = "\033[00;00m";
const string VERSION = "v.1.8.7";
const string VERSION = "v.1.8.8";

cout<<BOLD<<"LSD: LEAST-SQUARES METHODS TO ESTIMATE RATES AND DATES - "<<VERSION<<"\n\n";
cout<<BOLD<<"DESCRIPTION\n"
Expand Down Expand Up @@ -567,6 +567,7 @@ void printHelp( void )
<<FLAT<<"\t" <<BOLD<<"-f " <<LINE<<"samplingNumberCI or bootstrapTreeFile\n"
<<FLAT<<"\t This option calculates the confidence intervals of the estimated rate and dates. If the bootstrap trees file is specified, then dating is \n"
<<FLAT<<"\t processed on each of these trees, and the 2.5th and 97.5th percentile of calculated rates and dates are reported as confidence intervals.\n"
<<FLAT<<"\t Note that the bootstraps trees must have the exact topology as the original tree.\n"
<<FLAT<<"\t If there's no bootstrap trees, then we simulate a set of trees. In this case, the number of simulated trees should be specified. Those \n"
<<FLAT<<"\t trees have the same topology as the original one, and their branch lengths are generated as follow:\n"
<<FLAT<<"\t b_i = Poisson(B_i*seqLen)*lognormal(1,q)\n"
Expand Down
3 changes: 1 addition & 2 deletions src/outliers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ bool calculateOutliers(Pr* & pr,Node** & nodes,double & median_rate){
}
if (pr->outlier.size()>0){
std::ostringstream oss;
oss<<"- The input dates associated with the following "<<pr->outlier.size()<<" nodes are considered as\n outliers, so the nodes were removed from the analysis:\n";
oss<<"- The input dates associated with the following "<<pr->outlier.size()<<" nodes are considered as\n outliers, so those nodes were removed from the analysis:\n";
for (int i=0;i<pr->outlier.size();i++){
//if (pr->outlier[i] >= pr->nbINodes){
oss<<" "<<(nodes[pr->outlier[i]]->L).c_str();
Expand Down Expand Up @@ -264,7 +264,6 @@ void shift_node_id(Pr* &pr,Node** &nodes,int* &tab){
}
Pr* prReduced = new Pr(pr->nbINodes-inodes_shift,pr->nbBranches-shift);
prReduced->copy(pr);
prReduced->outlier.clear();

Node** nodesReduced = new Node*[prReduced->nbBranches+1];
nodesReduced[0]=new Node();
Expand Down
2 changes: 2 additions & 0 deletions src/pr.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ typedef struct Pr
if (fnOutgroup != "" ) rooted = true;
warningMessage.clear();
resultMessage.clear();
outlier.clear();
}
void copy(Pr* pr){
inFile = pr->inFile;
Expand All @@ -83,6 +84,7 @@ typedef struct Pr
outFile = pr->outFile;
treeFile1 = pr->treeFile1;
treeFile2 = pr->treeFile2;
outlier = pr->outlier;
mrca=pr->mrca;
MRCA=pr->MRCA;
leaves=pr->leaves;
Expand Down
14 changes: 12 additions & 2 deletions src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,9 +2333,9 @@ void initialize_status(Pr* &pr,Node** &nodes){
if (nodes[i]->type=='p') nodes[i]->status=8;
else nodes[i]->status=0;
}
for (int i=0;i<pr->outlier.size();i++){
/*for (int i=0;i<pr->outlier.size();i++){
nodes[pr->outlier[i]]->status=0;
}
}*/
}

list<int> getActiveSet(Pr* pr,Node** nodes){
Expand Down Expand Up @@ -3156,3 +3156,13 @@ void collapseUnInformativeBranches(Pr* &pr,Node** &nodes){
pr = prReduced;
nodes = nodesReduced;
}

bool checkTopology(Pr* pr,Node** nodes1, Node** nodes2){
for (int i=pr->nbINodes;i<=pr->nbBranches;i++){
if (nodes1[i]->L != nodes2[i]->L) return false;
}
for (int i=1;i<=pr->nbBranches;i++){
if (nodes1[i]->P != nodes2[i]->P) return false;
}
return true;
}
2 changes: 2 additions & 0 deletions src/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,6 @@ void adjustDateToYMD(Date*& date,int m1,int d1,int m2,int d2);

void adjustDateToYM(Date*& date,int m1,int d1,int m2,int d2);

bool checkTopology(Pr* pr,Node** nodes1, Node** nodes2);

#endif

0 comments on commit 2e0e433

Please sign in to comment.