From 095abdfe5c98aceee04c3c80face3bb90cf60c91 Mon Sep 17 00:00:00 2001 From: JLSteenwyk Date: Sun, 15 Sep 2024 07:52:13 -0700 Subject: [PATCH] updated root tree function --- phykit/services/tree/rf_distance.py | 36 ++++++++++++------- phykit/services/tree/root_tree.py | 9 +++-- .../tree/test_rf_distance_integration.py | 8 ++--- tests/integration/tree/test_root_tree.py | 8 ++--- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/phykit/services/tree/rf_distance.py b/phykit/services/tree/rf_distance.py index e83a38e..55a1e92 100644 --- a/phykit/services/tree/rf_distance.py +++ b/phykit/services/tree/rf_distance.py @@ -1,3 +1,7 @@ +from typing import Dict, List + +from Bio.Phylo import Newick + from .base import Tree @@ -9,18 +13,17 @@ def run(self): tree_zero = self.read_tree_file() tree_one = self.read_tree1_file() - # get tree tip names + # get shared tree tip names tree_zero_tips = self.get_tip_names_from_tree(tree_zero) tree_one_tips = self.get_tip_names_from_tree(tree_one) - - # determine shared tips, tips to prune, and prune tips shared_tree_tips = self.shared_tips(tree_zero_tips, tree_one_tips) + + # prune to common set tree_zero_tips_to_prune = list(set(tree_zero_tips) - set(shared_tree_tips)) tree_one_tips_to_prune = list(set(tree_one_tips) - set(shared_tree_tips)) tree_zero = self.prune_tree_using_taxa_list(tree_zero, tree_zero_tips_to_prune) tree_one = self.prune_tree_using_taxa_list(tree_one, tree_one_tips_to_prune) - tip_for_rooting = "" for term in tree_zero.get_terminals(): tip_for_rooting = term.name break @@ -33,22 +36,29 @@ def run(self): print(f"{plain_rf}\t{round(normalized_rf, 4)}") - def process_args(self, args): - return dict(tree_file_path=args.tree_zero, tree1_file_path=args.tree_one) + def process_args(self, args) -> Dict[str, str]: + return dict( + tree_file_path=args.tree_zero, + tree1_file_path=args.tree_one, + ) def calculate_robinson_foulds_distance(self, tree_zero, tree_one): plain_rf = 0 plain_rf = self.compare_trees(plain_rf, tree_zero, tree_one) plain_rf = self.compare_trees(plain_rf, tree_one, tree_zero) - # count the number of tips in a phylogeny + tip_count = tree_zero.count_terminals() - # calculate normalized rf distance normalized_rf = plain_rf / (2 * (tip_count - 3)) return plain_rf, normalized_rf - def compare_trees(self, plain_rf: int, tree_zero: Tree, tree_one: Tree) -> int: + def compare_trees( + self, + plain_rf: int, + tree_zero: Newick.Tree, + tree_one: Newick.Tree + ) -> int: # loop through tree_zero and find similar clade in tree_one for clade_zero in tree_zero.get_nonterminals()[1:]: # initialize and populate a list of tip names in tree_zero @@ -65,11 +75,11 @@ def compare_trees(self, plain_rf: int, tree_zero: Tree, tree_one: Tree) -> int: return plain_rf def determine_if_clade_differs( - self, plain_rf: int, tip_names_zero: list, tip_names_one: list + self, + plain_rf: int, + tip_names_zero: List[str], + tip_names_one: List[str], ) -> int: - """ - if clade differs, add 1 to plain_rf value - """ if set(tip_names_zero) != set(tip_names_one): plain_rf += 1 diff --git a/phykit/services/tree/root_tree.py b/phykit/services/tree/root_tree.py index 45dea64..885c0b8 100644 --- a/phykit/services/tree/root_tree.py +++ b/phykit/services/tree/root_tree.py @@ -12,7 +12,8 @@ def __init__(self, args) -> None: def run(self): tree = self.read_tree_file() - outgroup = read_single_column_file_to_list(self.outgroup_taxa_file_path) + outgroup = \ + read_single_column_file_to_list(self.outgroup_taxa_file_path) Phylo.BaseTree.Tree.root_with_outgroup(tree, outgroup) @@ -21,10 +22,8 @@ def run(self): def process_args(self, args): tree_file_path = args.tree - if args.output is None: - output_file_path = f"{tree_file_path}.rooted" - else: - output_file_path = f"{args.output}" + output_file_path = \ + args.output if args.output else f"{tree_file_path}.rooted" return dict( tree_file_path=tree_file_path, diff --git a/tests/integration/tree/test_rf_distance_integration.py b/tests/integration/tree/test_rf_distance_integration.py index d6373ee..6923a17 100644 --- a/tests/integration/tree/test_rf_distance_integration.py +++ b/tests/integration/tree/test_rf_distance_integration.py @@ -1,9 +1,7 @@ -import pytest -import sys -from math import isclose from mock import patch, call from pathlib import Path -from textwrap import dedent +import pytest +import sys from phykit.phykit import Phykit @@ -117,4 +115,4 @@ def test_rf_distance_incorrect_tree1_path(self, mocked_print): Phykit() assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 2 \ No newline at end of file + assert pytest_wrapped_e.value.code == 2 diff --git a/tests/integration/tree/test_root_tree.py b/tests/integration/tree/test_root_tree.py index 1dd4399..3ae01bb 100644 --- a/tests/integration/tree/test_root_tree.py +++ b/tests/integration/tree/test_root_tree.py @@ -1,9 +1,7 @@ -import pytest -import sys -from math import isclose from mock import patch, call from pathlib import Path -from textwrap import dedent +import pytest +import sys from phykit.phykit import Phykit @@ -131,4 +129,4 @@ def test_root_tree_custom_output(self, mocked_print): with open(f"{here.parent.parent.parent}/sample_files/tree_simple_rooted_custom_out.tre", "r") as out_tree: out_tree_content = out_tree.read() - assert expected_tree_content == out_tree_content \ No newline at end of file + assert expected_tree_content == out_tree_content