From 052e579b897ba8adc188a9aca0f91093edf33136 Mon Sep 17 00:00:00 2001 From: flouris Date: Tue, 31 Jan 2017 19:06:20 +0100 Subject: [PATCH] improved ASV score, corrected regexp for scientific notation when parsing branch lengths, fixed memory leaks when parsing incorrectly formatted trees --- ChangeLog.md | 6 ++++++ README.md | 15 +++++++------- configure.ac | 2 +- man/mptp.1 | 10 ++++++++-- src/lex_rtree.l | 51 ++++++++++++++++++++++++++--------------------- src/lex_utree.l | 51 ++++++++++++++++++++++++++--------------------- src/mptp.c | 5 ++++- src/multirun.c | 20 ++++++++++++++++--- src/parse_rtree.y | 3 +++ 9 files changed, 103 insertions(+), 60 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 65921a9..139f47f 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,6 +2,12 @@ All notable changes to `mptp` will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [0.2.2] - 2017-01-31 +### Fixed + - Regular expressions now allow scientific notation when parsing branch lengths + - Improved accuracy of ASV score (takes into account tip species) + - Memory leaks when parsing incorrectly formatted trees + ## [0.2.1] - 2016-10-18 ### Fixed - Updated ASV to consider only coalescent roots of ML delimitation diff --git a/README.md b/README.md index 6faf523..7fb5416 100644 --- a/README.md +++ b/README.md @@ -78,9 +78,9 @@ where `DIR` is the directory where bash autocompletion is stored. You can use and the documentation, use the following commands: ```bash -wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.1/mptp-src-0.2.1.tar.gz -tar zxvf mptp-src-0.2.1.tar.gz -cd mptp-src-0.2.1 +wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.2/mptp-src-0.2.2.tar.gz +tar zxvf mptp-src-0.2.2.tar.gz +cd mptp-src-0.2.2 ./configure make make install # as root, or run sudo make install @@ -110,12 +110,12 @@ To use the pre-compiled binary, download the appropriate executable for your system using the following commands if you are using a Linux system: ```bash -wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.1/mptp-0.2.1-linux-x86_64.tar.gz -tar zxvf mptp-0.2.1-linux-x86_64.tar.gz +wget https://github.com/Pas-Kapli/mptp/releases/download/v0.2.2/mptp-0.2.2-linux-x86_64.tar.gz +tar zxvf mptp-0.2.2-linux-x86_64.tar.gz ``` You will now have the binary distribution in a folder called -`mptp-0.2.1-linux-x86_64` in which you will find three subfolders `bin`, `man` +`mptp-0.2.2-linux-x86_64` in which you will find three subfolders `bin`, `man` and `doc`. We recommend making a copy or a symbolic link to the mptp binary `bin/mptp` in a folder included in your `$PATH`, and a copy or a symbolic link to the mptp man page `man/mptp.1` in a folder included in your `$MANPATH`. The @@ -171,7 +171,8 @@ Visualization options: ## Usage example ```bash -./mptp --ml --multi --tree_file testTree --output_file out --outgroup A,C --tree_show +mptp --ml --multi --tree_file testTree --output_file out --outgroup A,C --tree_show +mptp --mcmc 50000000 --multi --mcmc_sample 1000000 --mcmc_burnin 1000000 --tree_file tree.newick --output_file out ``` ## Documentation diff --git a/configure.ac b/configure.ac index 914923b..7e8fa14 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT([mptp], [0.2.1], [Tomas.Flouri@h-its.org]) +AC_INIT([mptp], [0.2.2], [Tomas.Flouri@h-its.org]) AM_INIT_AUTOMAKE([subdir-objects]) AC_LANG([C]) AC_CONFIG_SRCDIR([src/mptp.c]) diff --git a/man/mptp.1 b/man/mptp.1 index a6900fb..c685b2d 100644 --- a/man/mptp.1 +++ b/man/mptp.1 @@ -1,6 +1,6 @@ .\" -*- coding: utf-8 -*- .\" ============================================================================ -.TH mptp 1 "October 18, 2016" "mptp 0.2.1" "USER COMMANDS" +.TH mptp 1 "January 31, 2017" "mptp 0.2.2" "USER COMMANDS" .\" ============================================================================ .SH NAME mptp \(em single-locus species delimitation @@ -298,7 +298,7 @@ Source code and binaries are available at . .\" ============================================================================ .SH COPYRIGHT -Copyright (C) 2015-2016, Tomas Flouri, Sarah Lutteropp, Paschalia Kapli +Copyright (C) 2015-2017, Tomas Flouri, Sarah Lutteropp, Paschalia Kapli .PP All rights reserved. .PP @@ -345,5 +345,11 @@ sizes. Updated ASV to consider only coalescent roots of ML delimitation. Removed assertion stopping mptp when using random starting delimitations for the MCMC method. +.TP +.BR v0.2.2\~ "released January 31st, 2017" +Fixed regular expressions to allow scientific notation for branch lengths when +parsing trees. Improved the accuracy of ASV score by also taking into account +tips forming coalescent roots. Fixed memory leaks that occur when parsing +incorrectly formatted trees. .RE .LP diff --git a/src/lex_rtree.l b/src/lex_rtree.l index 7815a06..f7c7d2c 100644 --- a/src/lex_rtree.l +++ b/src/lex_rtree.l @@ -47,35 +47,40 @@ static char * append(size_t * dstlen, const char * src, size_t srclen) %% { -\\\" {append(&string_length, "\\\"", 2);} -\' {append(&string_length, "\'", 1);} -\" {BEGIN(INITIAL);return STRING;} +\\\" { append(&string_length, "\\\"", 2); } +\' { append(&string_length, "\'", 1); } +\" { BEGIN(INITIAL); return STRING; } } { -\\\' {append(&string_length, "\\\'", 2);} -\" {append(&string_length, "\"", 1);} -\' {BEGIN(INITIAL);return STRING;} +\\\' { append(&string_length, "\\\'", 2); } +\" { append(&string_length, "\"", 1); } +\' { BEGIN(INITIAL); return STRING; } } { -\\n {append(&string_length, "\\n", 2);} -\\t {append(&string_length, "\\t", 2);} -\\ {append(&string_length, "\\", 1);} -\\\\ {append(&string_length, "\\\\", 2);} -([^\"\'\\]|\n)+ {append(&string_length, rtree_text, rtree_leng);} +\\n { append(&string_length, "\\n", 2); } +\\t { append(&string_length, "\\t", 2); } +\\ { append(&string_length, "\\", 1); } +\\\\ { append(&string_length, "\\\\", 2); } +([^\"\'\\]|\n)+ { append(&string_length, rtree_text, rtree_leng); } } -\: return COLON; -\; return SEMICOLON; -\) return CPAR; -\( return OPAR; -\, return COMMA; -\" {string_length = 0; BEGIN(quot);} -\' {string_length = 0; BEGIN(apos);} -[\+|\-]?[0-9]+ {rtree_lval.d = xstrndup(rtree_text, rtree_leng); return NUMBER;} -[\+|\-]?[0-9]+\.[0-9]+([e|E][\+|\-]?[0-9]+)? {rtree_lval.d = xstrndup(rtree_text, rtree_leng); return NUMBER;} -[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* {rtree_lval.s = xstrndup(rtree_text, rtree_leng); return STRING;} -[ \t\n\r] ; -. {fatal("Syntax error (%c)\n", rtree_text[0]);} +\: { return COLON; } +\; { return SEMICOLON; } +\) { return CPAR; } +\( { return OPAR; } +\, { return COMMA; } +\" { string_length = 0; BEGIN(quot); } +\' { string_length = 0; BEGIN(apos); } +[-+]?[0-9]+ { rtree_lval.d = xstrndup(rtree_text,rtree_leng); + return NUMBER; } +[+-]?(([0-9]+[\.]?[0-9]*)|([0-9]*[\.]?[0-9]+))([eE][+-]?[0-9]+)? { + rtree_lval.d = xstrndup(rtree_text,rtree_leng); + return NUMBER; } +[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* { + rtree_lval.s = xstrndup(rtree_text,rtree_leng); + return STRING; } +[ \t\n\r] { ; } +. { fatal("Syntax error (%c)\n", rtree_text[0]); } %% diff --git a/src/lex_utree.l b/src/lex_utree.l index 9942e79..50af97f 100644 --- a/src/lex_utree.l +++ b/src/lex_utree.l @@ -47,35 +47,40 @@ static char * append(size_t * dstlen, const char * src, size_t srclen) %% { -\\\" {append(&string_length, "\\\"", 2);} -\' {append(&string_length, "\'", 1);} -\" {BEGIN(INITIAL);return STRING;} +\\\" { append(&string_length, "\\\"", 2); } +\' { append(&string_length, "\'", 1); } +\" { BEGIN(INITIAL); return STRING; } } { -\\\' {append(&string_length, "\\\'", 2);} -\" {append(&string_length, "\"", 1);} -\' {BEGIN(INITIAL);return STRING;} +\\\' { append(&string_length, "\\\'", 2); } +\" { append(&string_length, "\"", 1); } +\' { BEGIN(INITIAL);return STRING;} } { -\\n {append(&string_length, "\\n", 2);} -\\t {append(&string_length, "\\t", 2);} -\\ {append(&string_length, "\\", 1);} -\\\\ {append(&string_length, "\\\\", 2);} -([^\"\'\\]|\n)+ {append(&string_length, utree_text, utree_leng);} +\\n { append(&string_length, "\\n", 2); } +\\t { append(&string_length, "\\t", 2); } +\\ { append(&string_length, "\\", 1); } +\\\\ { append(&string_length, "\\\\", 2); } +([^\"\'\\]|\n)+ { append(&string_length, utree_text, utree_leng); } } -\: return COLON; -\; return SEMICOLON; -\) return CPAR; -\( return OPAR; -\, return COMMA; -\" {string_length = 0; BEGIN(quot);} -\' {string_length = 0; BEGIN(apos);} -[\+|\-]?[0-9]+ {utree_lval.d = xstrndup(utree_text, utree_leng); return NUMBER;} -[\+|\-]?[0-9]+\.[0-9]+([e|E][\+|\-]?[0-9]+)? {utree_lval.d = xstrndup(utree_text, utree_leng); return NUMBER;} -[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* {utree_lval.s = xstrndup(utree_text, utree_leng); return STRING;} -[ \t\n\r] ; -. {fatal("Syntax error (%c)\n", utree_text[0]);} +\: { return COLON; } +\; { return SEMICOLON; } +\) { return CPAR; } +\( { return OPAR; } +\, { return COMMA; } +\" { string_length = 0; BEGIN(quot); } +\' { string_length = 0; BEGIN(apos); } +[-+]?[0-9]+ { utree_lval.d = xstrndup(utree_text,utree_leng); + return NUMBER; } +[+-]?(([0-9]+[\.]?[0-9]*)|([0-9]*[\.]?[0-9]+))([eE][+-]?[0-9]+)? { + utree_lval.d = xstrndup(utree_text,utree_leng); + return NUMBER; } +[^ \'\",\(\):;\[\]\t\n\r][^ \t\n\r\)\(\[\]\,:;]* { + utree_lval.s = xstrndup(utree_text,utree_leng); + return STRING; } +[ \t\n\r] { ; } +. { fatal("Syntax error (%c)\n", utree_text[0]); } %% diff --git a/src/mptp.c b/src/mptp.c index 4306437..0a15718 100644 --- a/src/mptp.c +++ b/src/mptp.c @@ -380,6 +380,9 @@ void cmd_help() "Usage: %s [OPTIONS]\n", progname); fprintf(stderr, "\n" + "Examples:\n" + " mptp --ml --multi --tree_file tree.newick --output_file output\n" + " mptp --mcmc 50000000 --multi --mcmc_sample 1000000 --mcmc_burnin 1000000 --tree_file tree.newick --output_file output\n\n" "General options:\n" " --help display help information.\n" " --version display version information.\n" @@ -436,7 +439,7 @@ static rtree_t * load_tree(void) unsigned int tip_count; utree_t * utree = utree_parse_newick(opt_treefile, &tip_count); if (!utree) - fatal("Tree is neither unrooted nor rooted. Go fix your tree."); + fatal("Tree is neither unrooted nor rooted."); if (!opt_quiet) { diff --git a/src/multirun.c b/src/multirun.c index 9cfd5e0..1a53016 100644 --- a/src/multirun.c +++ b/src/multirun.c @@ -21,6 +21,9 @@ #include "mptp.h" +#define MPTP_INNER_CROOT 1 +#define MPTP_TIP_CROOT 2 + static double asv(int * mlcroots, double * support, int count) { int i; @@ -29,11 +32,16 @@ static double asv(int * mlcroots, double * support, int count) for (i = 0; i < count; ++i) { - if (mlcroots[i] == 1) + if (mlcroots[i] == MPTP_INNER_CROOT) { sum += (1-support[i]); croots_count++; } + else if (mlcroots[i] == MPTP_TIP_CROOT) + { + sum += support[i]; + croots_count++; + } } return sum / croots_count; @@ -51,14 +59,20 @@ static void extract_croots_recursive(rtree_t * node, if (node->event == EVENT_COALESCENT && node->parent->event == EVENT_SPECIATION) { - outbuffer[*index] = 1; + outbuffer[*index] = MPTP_INNER_CROOT; } + else + { + if ((node->event == EVENT_SPECIATION) && (node->left->edge_count == 0 || node->right->edge_count == 0)) + outbuffer[*index] = MPTP_TIP_CROOT; + } + } else { outbuffer[*index] = 0; if (node->event == EVENT_COALESCENT) - outbuffer[*index] = 1; + outbuffer[*index] = MPTP_INNER_CROOT; } *index = *index+1; diff --git a/src/parse_rtree.y b/src/parse_rtree.y index 3215ef2..82206ee 100644 --- a/src/parse_rtree.y +++ b/src/parse_rtree.y @@ -56,6 +56,9 @@ static void rtree_error(rtree_t * tree, const char * s) %error-verbose %parse-param {struct rtree_s * tree} %destructor { rtree_destroy($$); } subtree +%destructor { free($$); } STRING +%destructor { free($$); } NUMBER +%destructor { free($$); } label %token OPAR %token CPAR