Skip to content

Commit

Permalink
simplified the reporting of results for the hidden paralogy check fun…
Browse files Browse the repository at this point in the history
…ction
  • Loading branch information
JLSteenwyk committed Dec 12, 2023
1 parent c534e3f commit 3052b5b
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 67 deletions.
4 changes: 4 additions & 0 deletions change_log.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
Major changes to PhyKIT are summarized here.

1.12.3
- hidden paralogy check now simply looks for monophyly or lack thereof for a set of taxa. Hidden paralogy
check still reports insufficient taxon representation.

1.12.2
- removed root.txt file from DVMC function. User's are now recommended to trim outgroup taxa beforehand

Expand Down
16 changes: 5 additions & 11 deletions docs/usage/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -648,24 +648,18 @@ A B C |br|
D E F |br|
"

The output will have six columns and as many rows
as clades were specified in the -c file. For example,
if there were three rows of clades to examine the
monophyly of, there will be three rows in the output
The output will report if the specified taxa were monophyletic
or not. The number of rows will reflect how many groups of taxa
were checked for monophyly. For example,
if there were three rows of clades in the -c file, there will be
three rows in the output
where the first row in the output corresponds to the
results of the first row in the clade file. |br|
col 1: if the clade was or wasn't monophyletic |br|
col 2: average bipartition support value in the clade of interest |br|
col 3: maximum bipartition support value in the clade of interest |br|
col 4: minimum bipartition support value in the clade of interest |br|
col 5: standard deviation of bipartition support values in the clade of interest |br|
col 6: tip names of the clade specified in the clade file

The concept behind this analysis follows
Siu-Ting et al., Molecular Biology and Evolution (2019),
doi: 10.1093/molbev/msz067.


.. code-block:: shell
phykit hidden_paralogy_check <tree> -c/--clade <clade_file>
Expand Down
17 changes: 5 additions & 12 deletions phykit/phykit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,19 +1178,12 @@ def hidden_paralogy_check(argv):
Tip names not present in the tree will not be considered
when assessing hidden paralogy.
The output will have six columns and as many rows
as clades were specified in the -c file. For example,
if there were three rows of clades to examine the
monophyly of, there will be three rows in the output
where the first row in the output corresponds to the
The output will report if the specified taxa were monophyletic
or not. The number of rows will reflect how many groups of taxa
were checked for monophyly. For example, if there were three
rows of clades in the -c file, there will be three rows in the
output where the first row in the output corresponds to the
results of the first row in the clade file.
col 1: if the clade was or wasn't monophyletic
col 2: average bipartition support value in the clade of interest
col 3: maximum bipartition support value in the clade of interest
col 4: minimum bipartition support value in the clade of interest
col 5: standard deviation of bipartition support values in the clade of interest
col 6: tip names of taxa monophyletic with the lineage of interest
excluding those that are listed in the taxa_of_interest file
The concept behind this analysis follows
Siu-Ting et al., Molecular Biology and Evolution (2019).
Expand Down
42 changes: 8 additions & 34 deletions phykit/services/tree/hidden_paralogy_check.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import copy
import statistics as stat
import sys

from .base import Tree
import numpy as np

from ...helpers.stats_summary import calculate_summary_statistics_from_arr
from .base import Tree


class HiddenParalogyCheck(Tree):
Expand Down Expand Up @@ -44,10 +45,8 @@ def run(self):
set(clade_of_interest) ^ set(common_ancestor_tips)
)

stats = self.get_bootstrap_statistics(tree)

res_arr = self.populate_res_arr(
shared_tree_tips, diff_tips_between_clade_and_curr_tree, stats, res_arr
shared_tree_tips, diff_tips_between_clade_and_curr_tree, res_arr
)

self.print_results(res_arr)
Expand All @@ -69,47 +68,22 @@ def read_clades_file(self, clades):

return clades

def get_bootstrap_statistics(self, tree):
# get bootstrap support values
bs_vals = []
# populate bs_vals with bootstrap values
for terminal in tree.get_nonterminals():
# only include if a bootstrap value is present
if terminal.confidence != None:
bs_vals.append(terminal.confidence)
stats = calculate_summary_statistics_from_arr(bs_vals)

return stats

def populate_res_arr(
self, shared_tree_tips, diff_tips_between_clade_and_curr_tree, stats, res_arr
self, shared_tree_tips, diff_tips_between_clade_and_curr_tree, res_arr
):
temp = []

if len(diff_tips_between_clade_and_curr_tree) == 0:
temp.append("monophyletic")
else:
temp.append("not_monophyletic")
temp.append(stats["mean"])
temp.append(stats["maximum"])
temp.append(stats["minimum"])
temp.append(stats["standard_deviation"])
temp.append(diff_tips_between_clade_and_curr_tree)
res_arr.append(temp)

return res_arr

def print_results(self, res_arr):
for res in res_arr:
try:
if len(res[5]) != 0:
res[5].sort()
print(
f"{res[0]}\t{round(res[1], 4)}\t{round(res[2], 4)}\t{round(res[3], 4)}\t{round(res[4], 4)}\t{';'.join(res[5])}"
)
else:
print(
f"{res[0]}\t{round(res[1], 4)}\t{round(res[2], 4)}\t{round(res[3], 4)}\t{round(res[4], 4)}"
)
except IndexError:
print(f"{res[0]}")
print(
f"{res[0]}"
)
2 changes: 1 addition & 1 deletion phykit/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.12.2"
__version__ = "1.12.3"
18 changes: 9 additions & 9 deletions tests/integration/tree/test_hidden_paralogy_check_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def test_hidden_paralogy_check(self, mocked_print):
Phykit()

assert mocked_print.mock_calls == [
call("monophyletic\t100\t100\t100\t0.0"),
call("not_monophyletic\t95.7143\t100\t85\t7.3193\tAspergillus_fischeri_NRRL181.GCF_000149645.1_ASM14964v1;Aspergillus_fischeri_NRRL4585;Aspergillus_fumigatus_CEA10;Aspergillus_fumigatus_HMR_AF_270;Aspergillus_fumigatus_Z5;Aspergillus_oerlinghausenensis_CBS139183"),
call("monophyletic"),
call("not_monophyletic"),
call("insufficient_taxon_representation"),
]

Expand All @@ -43,8 +43,8 @@ def test_hidden_paralogy_check_long(self, mocked_print):
Phykit()

assert mocked_print.mock_calls == [
call("monophyletic\t100\t100\t100\t0.0"),
call("not_monophyletic\t95.7143\t100\t85\t7.3193\tAspergillus_fischeri_NRRL181.GCF_000149645.1_ASM14964v1;Aspergillus_fischeri_NRRL4585;Aspergillus_fumigatus_CEA10;Aspergillus_fumigatus_HMR_AF_270;Aspergillus_fumigatus_Z5;Aspergillus_oerlinghausenensis_CBS139183"),
call("monophyletic"),
call("not_monophyletic"),
call("insufficient_taxon_representation"),
]

Expand All @@ -61,8 +61,8 @@ def test_hidden_paralogy_check_alias(self, mocked_print):
Phykit()

assert mocked_print.mock_calls == [
call("monophyletic\t100\t100\t100\t0.0"),
call("not_monophyletic\t95.7143\t100\t85\t7.3193\tAspergillus_fischeri_NRRL181.GCF_000149645.1_ASM14964v1;Aspergillus_fischeri_NRRL4585;Aspergillus_fumigatus_CEA10;Aspergillus_fumigatus_HMR_AF_270;Aspergillus_fumigatus_Z5;Aspergillus_oerlinghausenensis_CBS139183"),
call("monophyletic"),
call("not_monophyletic"),
call("insufficient_taxon_representation"),
]

Expand All @@ -71,7 +71,7 @@ def test_hidden_paralogy_check_file_error0(self, mocked_print):
testargs = [
"phykit",
"clan_check",
f"{here.parent.parent.parent}/sample_files/small_Aspergillus_tree.t",
"file doesn't exist",
"-c",
f"{here.parent.parent.parent}/sample_files/small_Aspergillus_tree.hidden_paralogy_check.txt",
]
Expand All @@ -88,10 +88,10 @@ def test_hidden_paralogy_check_file_error1(self, mocked_print):
"clan_check",
f"{here.parent.parent.parent}/sample_files/small_Aspergillus_tree.tre",
"-c",
f"{here.parent.parent.parent}/sample_files/small_Aspergillus_tree.hidden_paralogy_check.t",
"file doesn't exist",
]
with pytest.raises(SystemExit) as pytest_wrapped_e:
Phykit()

assert pytest_wrapped_e.type == SystemExit
assert pytest_wrapped_e.value.code == 2
assert pytest_wrapped_e.value.code == 2

0 comments on commit 3052b5b

Please sign in to comment.