From 23de9376b4f93262b6ea6370c2fbe3478fafd994 Mon Sep 17 00:00:00 2001 From: JLSteenwyk Date: Fri, 13 Sep 2024 11:54:59 -0700 Subject: [PATCH] improved type hints for lb score function --- phykit/services/tree/lb_score.py | 57 ++++++++++--------- .../tree/test_lb_score_integration.py | 2 +- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/phykit/services/tree/lb_score.py b/phykit/services/tree/lb_score.py index 86602bb..4497845 100644 --- a/phykit/services/tree/lb_score.py +++ b/phykit/services/tree/lb_score.py @@ -1,5 +1,8 @@ import sys import itertools +from typing import Dict, List, Tuple + +from Bio.Phylo import Newick from .base import Tree @@ -26,28 +29,28 @@ def run(self): stats = calculate_summary_statistics_from_arr(LBis) print_summary_statistics(stats) - def process_args(self, args): + def process_args(self, args) -> Dict[str, str]: return dict(tree_file_path=args.tree, verbose=args.verbose) - def calculate_average_distance_between_tips(self, tips: list, tree) -> float: - # determine pairwise combinations of tips - combos = list(itertools.combinations(tips, 2)) + def calculate_average_distance_between_tips( + self, + tips: List[str], + tree: Newick.Tree, + ) -> float: + total_dist = sum( + tree.distance(tip1, tip2) + for tip1, tip2 in itertools.combinations(tips, 2) + ) - # determine average distance between tips - # avg_dist is PDa - total_dist = float() - for combo in combos: - total_dist += tree.distance(combo[0], combo[1]) + num_combos = len(tips) * (len(tips) - 1) // 2 - return total_dist / len(combos) + return total_dist / num_combos if num_combos else 0 def calculate_average_distance_of_taxon_to_other_taxa( - self, tips: list, tree - ) -> list: - """ - calculate average distance of taxon to all other taxon or average PDi. - Save results to avg PDis list - """ + self, + tips: List[str], + tree: Newick.Tree, + ) -> List[float]: avg_PDis = [] for tip in tips: tips_minus_i = list(set(tips) - set(tip)) @@ -59,10 +62,11 @@ def calculate_average_distance_of_taxon_to_other_taxa( return avg_PDis - def calculate_lb_score_per_taxa(self, avg_PDis: list, avg_dist: float) -> list: - """ - create a list with the lb scores for each taxon - """ + def calculate_lb_score_per_taxa( + self, + avg_PDis: List[float], + avg_dist: float + ) -> List[float]: LBis = [] for PDi in avg_PDis: try: @@ -76,18 +80,17 @@ def calculate_lb_score_per_taxa(self, avg_PDis: list, avg_dist: float) -> list: return LBis - def calculate_lb_score(self, tree): - # get tree tips + def calculate_lb_score( + self, + tree: Newick.Tree + ) -> Tuple[List[str], List[float]]: tips = self.get_tip_names_from_tree(tree) - # get average distance between tips avg_dist = self.calculate_average_distance_between_tips(tips, tree) - # calculate average distance of taxon i to all other taxa - # or PDi and save each result to LBi - avg_PDis = self.calculate_average_distance_of_taxon_to_other_taxa(tips, tree) + avg_PDis = \ + self.calculate_average_distance_of_taxon_to_other_taxa(tips, tree) - # use PDis and avgDist to calculate LB values for each taxon LBis = self.calculate_lb_score_per_taxa(avg_PDis, avg_dist) return tips, LBis diff --git a/tests/integration/tree/test_lb_score_integration.py b/tests/integration/tree/test_lb_score_integration.py index 8e0f2d6..cc9e674 100644 --- a/tests/integration/tree/test_lb_score_integration.py +++ b/tests/integration/tree/test_lb_score_integration.py @@ -166,4 +166,4 @@ def test_lb_score_zero_division_error(self, mocked_print): assert mocked_print.mock_calls == [ call("Invalid tree. Tree should contain branch lengths"), - ] \ No newline at end of file + ]