diff --git a/docs/quadripartitions.ipynb b/docs/quadripartitions.ipynb new file mode 100644 index 00000000..e972f619 --- /dev/null +++ b/docs/quadripartitions.ipynb @@ -0,0 +1,249 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quadripartition methods \n", + "\n", + "\n", + "The `iter_quadripartitions()` function in the `.enum` subpackage returns all possible quadripartitions of a tree. Quadripartitions are defined by an internal `focal edge`, from which the tree is split into four partitions. Each partition stems from the children of the 2 nodes on either side of the `focal edge`. The quadripartitions are yielded in Node idxorder traversal in a nested format: a tuple of two tuples of two sets, e.g. `(({e0},{e1}), ({e2},{e3}))`, with the contents representing the tip Nodes descending from each of the four partition stems. In this example, `e0` through `e3` are the partition stems and are each children of the nodes being split along the `focal edge`. The order in which the partitions of a particular quadripartition are ordered is (child-left, child-right, sister, up) in relation to the Node directly below the `focal edge`.\n", + "\n", + "Note: Sets are used by default, which means when there are multiple nodes in a partition, they will not be sorted. This can be modified by using an ordered datatype like `type=tuple` or `type=list`. This will sort the nodes in alphabetical or index order (depending on whether `feature=\"name\"` or `feature=\"idx\"`.)\n", + "\n", + "### Simple example\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(({'a'}, {'b'}), ({'d', 'c'}, {'e'}))\n", + "(({'c'}, {'d'}), ({'e'}, {'b', 'a'}))\n" + ] + }, + { + "data": { + "text/html": [ + "
XYZRabcde
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import toytree \n", + "\n", + "#define tree with simple Newick string\n", + "newick = \"((a,b)X,((c,d)Y,e)Z)R;\" \n", + "tree = toytree.tree(newick) \n", + "\n", + "tree.draw('r'); #draw tree in R-style\n", + "\n", + "#iteratively return all quadripartitions in phylogenetic tree\n", + "for q in toytree.enum.iter_quadripartitions(tree):\n", + " print(q)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this example, the only internal edges available to make quadripartitions from are the edges directly above `Y` and `Z` (or `X`, but it has the same result as `Z`). These two quadripartitions are given in tuples of tuples of sets to denotate the first (tuple) and second (sets) bipartitions that create the quadripartition." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sorting and formatting \n", + "\n", + "\n", + "When `type=` is set to an ordered datatype (i.e. tuple, list), `iter_quadripartitions` will automatically sort the values within each partition. The user can choose to sort the quadripartition further, ordering each partition within its bipartitions and each bipartition within its quadripartition, by using `sort=True`. This first orders them in size order (small to large), and if the sizes are equal, then by the lowest value Node present." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(((2,), (3,)), ((4, 5, 7), (0, 1))), (((4,), (5,)), ((2, 3, 6), (0, 1))), (((2, 3, 6), (4, 5, 7)), ((0,), (1,)))]\n", + "[(((2,), (3,)), ((0, 1), (4, 5, 7))), (((4,), (5,)), ((0, 1), (2, 3, 6))), (((0,), (1,)), ((2, 3, 6), (4, 5, 7)))]\n" + ] + }, + { + "data": { + "text/html": [ + "
abcdef
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#build tree from simple newick string and visualize it\n", + "tree = toytree.tree(\"(a,b,((c,d)CD,(e,f)EF)X)AB;\") \n", + "tree.draw()\n", + "\n", + "#return quadripartitions as list, with each partition returned in a tuple (instead of a set), including the internal nodes of each partition\n", + "unordered = list(tree.enum.iter_quadripartitions(type=tuple, \n", + " include_internal_nodes=True, \n", + " feature=\"idx\"))\n", + "ordered = list(tree.enum.iter_quadripartitions(type=tuple, \n", + " include_internal_nodes=True, \n", + " feature=\"idx\", \n", + " sort=True))\n", + "print(unordered)\n", + "print(ordered)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The user can also choose to return more or less information about each quadripartition, ranging from all tips and internal nodes using `include_internal_nodes=True`, to only the stems of each partition (`contract_partitions=True`). To specify what information is returned, use the `feature=` argument with any available Node feature. \n", + "\n", + "### Example \n", + "\n", + "#### Lots of information" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(({'alfred'}, {'bob'}), ({'david', 'Yasmin', 'cindy'}, {'ellis'}))\n", + "(({'cindy'}, {'david'}), ({'ellis'}, {'bob', 'Xiang', 'alfred'}))\n" + ] + }, + { + "data": { + "text/html": [ + "
XiangYasminZoroRandyalfredbobcindydavidellis
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import toytree \n", + "\n", + "#define tree with simple Newick string\n", + "newick = \"((alfred,bob)Xiang,((cindy,david)Yasmin,ellis)Zoro)Randy;\" \n", + "tree = toytree.tree(newick) \n", + "\n", + "tree.draw('r'); #draw tree in R-style\n", + "\n", + "\n", + "for q in toytree.enum.iter_quadripartitions(tree, \n", + " include_internal_nodes=True, \n", + " feature=\"name\"):\n", + " print(q)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Little information" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(({0}, {1}), ({6}, {4}))\n", + "(({2}, {3}), ({7}, {4}))\n" + ] + }, + { + "data": { + "text/html": [ + "
XiangYasminZoroRandyalfredbobcindydavidellis
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import toytree \n", + "\n", + "#define tree with simple Newick string\n", + "newick = \"((alfred,bob)Xiang,((cindy,david)Yasmin,ellis)Zoro)Randy;\" \n", + "tree = toytree.tree(newick) \n", + "\n", + "tree.draw('r'); #draw tree in R-style\n", + "\n", + "\n", + "for q in toytree.enum.iter_quadripartitions(tree, \n", + " contract_partitions=True, \n", + " feature=\"idx\", \n", + " sort=True):\n", + " print(q)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Both of these quadripartition sets represent the same quadripartitions from the same tree, just expressed in different formats." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "toytree_practice", + "language": "python", + "name": "toytree_practice" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/mkdocs.yml b/mkdocs.yml index c99a807b..5557d4ce 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -113,7 +113,7 @@ nav: - .enum - tree enumeration: - bipartition methods: bipartitions.ipynb - quartet methods: quartets.ipynb - - quadripartition methods: ... + - quadripartition methods: quadripartitions.ipnyb # - other: ... - .distance - tree/node dists: diff --git a/toytree/enum/src/quadripartitions.py b/toytree/enum/src/quadripartitions.py index 452d665c..03a33b68 100644 --- a/toytree/enum/src/quadripartitions.py +++ b/toytree/enum/src/quadripartitions.py @@ -422,34 +422,40 @@ def format_quadripartition( import toytree - newick = "((a,b)X,((c,d)Y,e)Z)R;" - tree = toytree.tree(newick).root("c") + # newick = "((a,b)X,((c,d)Y,e)Z)R;" + # tree = toytree.tree(newick).root("c") # tree = toytree.rtree.baltree(12, seed=1234).unroot() tree = toytree.tree("(a,b,((c,d)CD,(e,f)EF)X)AB;") + + results = iter_quadripartitions(tree, type=list, sort=True, include_internal_nodes=True) + + for item in results: + print(item[1][0][0]) + # tree._draw_browser(ts='r') # for bipart in _iter_quadripartition_sets(tree, include_internal_nodes=True): # for part in iter_quadripartitions(tree,): # print(part) # args to get consistently sorted biparts regardless of rooting - tree = tree.root("X") - for i in sorted(iter_quadripartitions(tree, type=set, sort=True)): - print(i) + # tree = tree.root("X") + # for i in sorted(iter_quadripartitions(tree, type=set, sort=True)): + # print(i) # [(('a', 'b'), ('c', 'd', 'e', 'f')), # (('c', 'd'), ('a', 'b', 'e', 'f')), # (('e', 'f'), ('a', 'b', 'c', 'd'))] - tree = tree.root("X") - for i in sorted(iter_quadripartitions(tree, sort=True, collapse=True)): - print(i) + # tree = tree.root("X") + # for i in sorted(iter_quadripartitions(tree, sort=True, collapse=True)): + # print(i) # convert consistent bipartitions to sets for easy comparison - x = set(tree.root('a').enum.iter_quadripartitions(type=tuple, sort=True)) - y = set(tree.root('e').enum.iter_quadripartitions(type=tuple, sort=True)) - assert x == y + # x = set(tree.root('a').enum.iter_quadripartitions(type=tuple, sort=True)) + # y = set(tree.root('e').enum.iter_quadripartitions(type=tuple, sort=True)) + # assert x == y # tree = toytree.rtree.unittree(6, seed=123).unroot() # c1, *_ = tree.draw('p') diff --git a/toytree/enum/tests/test_quadripartitions.py b/toytree/enum/tests/test_quadripartitions.py index 9023f463..42f4173c 100644 --- a/toytree/enum/tests/test_quadripartitions.py +++ b/toytree/enum/tests/test_quadripartitions.py @@ -2,6 +2,53 @@ """Test enumeration methods to find partitions in a tree. +args: +feature, +contract_partitions, (True, False) "con" +include_internal_nodes, (True, False) "int" +collapse, (True, False) "col" +type, (set, list, tuple) +sort (True, False) + +possible combinations of arguments: + +2*2*2*2*3 = 48 + +test_set (default) + test_set_sort + test_set_sort_int + test_set_sort_int_con + test_set_sort_int_con_col + test_set_sort_con + test_set_sort_con_col + test_set_int + test_set_int_con + test_set_int_con_col + +etc. + +These tests pick a subset of 16 possible combinations picked at random +(first three test defaults of each major type) +*reproducable tests will be checked for exact content + +test_default (set) +test_tuple +test_list +test_set_int_col +test_set_int_con_col +test_list_sort_int_con_col* +test_list_sort_con* +test_list_sort_con_col* +test_list_int +test_list_sort_int_col* +test_set_int +test_list_sort* +test_tuple_col +test_tuple_int +test_set_int_col +test_tuple_sort_int_col* + + """ import unittest @@ -10,111 +57,116 @@ from toytree.enum import iter_quadripartitions, _iter_quadripartition_sets -class TestQuadripartitions(unittest.TestCase): +class TestQuartets(unittest.TestCase): def setUp(self): - """Six tip tree three clades of two.""" + """Setting up test trees: + tree1: Six tip tree three clades of two. + tree2: random six tip tree from seed for reproduceability + tree3: list of tree1 and tree2""" self.tree1 = toytree.tree("(a,b,((c,d)CD,(e,f)EF)X)AB;") - self.tree2 = self.tree1.root("a") - self.tree3 = self.tree1.root("a", "b") - self.trees = [self.tree1, self.tree2, self.tree3] - - def test_iter_quadripartitions_sets1(self): - """Quadripartitions """ - PARTS = [ - (({'c'}, {'d'}), ({'e', 'f'}, {'a', 'b'})), - (({'e'}, {'f'}), ({'c', 'd'}, {'a', 'b'})), - (({'c', 'd'}, {'e', 'f'}), ({'a'}, {'b'})), - ] - parts = sorted(_iter_quadripartition_sets(self.tree1)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'EF', 'f', 'e'}, {'AB', 'b', 'a'})), - (({'e'}, {'f'}), ({'c', 'd', 'CD'}, {'AB', 'b', 'a'})), - (({'c', 'd', 'CD'}, {'EF', 'f', 'e'}), ({'a'}, {'b'})), - ] - parts = sorted(_iter_quadripartition_sets(self.tree1, include_internal_nodes=True)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'EF'}, {'AB'})), - (({'e'}, {'f'}), ({'CD'}, {'AB'})), - (({'CD'}, {'EF'}), ({'a'}, {'b'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree1, contract_partitions=True)) - self.assertEqual(parts, PARTS) - - def test_iter_quadripartitions_sets2(self): - """Quadripartitions """ - PARTS = [ - (({'c'}, {'d'}), ({'f', 'e'}, {'a', 'b'})), - (({'e'}, {'f'}), ({'c', 'd'}, {'a', 'b'})), - (({'c', 'd'}, {'f', 'e'}), ({'b'}, {'a'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree2)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'f', 'e', 'EF'}, {'AB', 'b', 'a'})), - (({'e'}, {'f'}), ({'c', 'd', 'CD'}, {'AB', 'b', 'a'})), - (({'c', 'd', 'CD'}, {'f', 'e', 'EF'}), ({'b'}, {'a'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree2, include_internal_nodes=True)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'EF'}, {'AB'})), - (({'e'}, {'f'}), ({'CD'}, {'AB'})), - (({'CD'}, {'EF'}), ({'b'}, {'a'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree2, contract_partitions=True)) - self.assertEqual(parts, PARTS) - - def test_iter_quadripartitions_sets3(self): - """Quadripartitions """ - PARTS = [ - (({'c'}, {'d'}), ({'f', 'e'}, {'a', 'b'})), - (({'e'}, {'f'}), ({'d', 'c'}, {'a', 'b'})), - (({'a'}, {'b'}), ({'d', 'c'}, {'f', 'e'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree3)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'f', 'e', 'EF'}, {'AB', 'a', 'b'})), - (({'e'}, {'f'}), ({'CD', 'd', 'c'}, {'AB', 'a', 'b'})), - (({'a'}, {'b'}), ({'CD', 'd', 'c'}, {'f', 'e', 'EF'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree3, include_internal_nodes=True)) - self.assertEqual(parts, PARTS) - - PARTS = [ - (({'c'}, {'d'}), ({'EF'}, {'AB'})), - (({'e'}, {'f'}), ({'CD'}, {'AB'})), - (({'a'}, {'b'}), ({'CD'}, {'EF'})), - ] - parts = sorted(iter_edge_quadripartition_sets(self.tree3, contract_partitions=True)) - self.assertEqual(parts, PARTS) - - def test_iter_quadripartitions_unrooted(self): - """Quadripartitions """ - for tree in self.trees: - parts = sorted(iter_quadripartitions(tree)) - PARTS = [ - (('a', 'b'), ('c', 'e')), - (('a', 'b'), ('c', 'f')), - (('a', 'b'), ('d', 'e')), - (('a', 'b'), ('d', 'f')), - (('a', 'c'), ('e', 'f')), - (('a', 'd'), ('e', 'f')), - (('a', 'e'), ('c', 'd')), - (('a', 'f'), ('c', 'd')), - (('b', 'c'), ('e', 'f')), - (('b', 'd'), ('e', 'f')), - (('b', 'e'), ('c', 'd')), - (('b', 'f'), ('c', 'd')), - ] - self.assertEqual(parts, PARTS) + self.tree2 = toytree.rtree.unittree(6, seed=123, random_names=True) + self.trees = [self.tree1, self.tree2] + + def _test_sorting(self, results, collapse: bool = False, sort: bool = False): + """Helper function to test sorting of quadripartitions.""" + for item in results: + if not isinstance(item[0], set): + if collapse: + if sort: + if item[0][0] > item[1][0]: #if the first node of each collection is out of order, then make sure it is in length order + self.assertGreater(len(item[1]), len(item[0])) + + else: + if sort: + self.assertGreater(item[0][1][0], item[0][0][0]) #quartet-level sorting (x,y),(i,j) -> (i,j),(x,y) + + + def test_default(self): #testing default behavior (set) + results = list(iter_quadripartitions(self.tree1)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], set) + self._test_sorting(results) + def test_tuple(self): + results = list(iter_quadripartitions(self.tree1, type=tuple)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], tuple) + self._test_sorting(results) + def test_list(self): + results = list(iter_quadripartitions(self.tree1, type=list)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], list) + self._test_sorting(results) + def test_set_int_col(self): + results = list(iter_quadripartitions(self.tree1, include_internal_nodes=True, collapse=True)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], set) + self._test_sorting(results, collapse=True) + def test_set_int_con_col(self): + results = list(iter_quadripartitions(self.tree1, include_internal_nodes=True, contract_partitions=True, collapse=True)) + self.assertIsInstance(results[0][0], set) + self._test_sorting(results, collapse=True) + def test_list_sort_int_con_col(self): + results = sorted(iter_quadripartitions(self.tree1, type=list, sort=True, include_internal_nodes=True, contract_partitions=True, collapse=True)) + RESULTS = [(['CD'], ['EF'], ['a'], ['b']), (['CD'], ['X'], ['e'], ['f']), (['EF'], ['X'], ['c'], ['d'])] + self.assertIsInstance(results[0][0], list) + self._test_sorting(results, collapse=True, sort=True) + self.assertEqual(results, RESULTS) + self._test_sorting(results, collapse=True, sort=True) + def test_list_sort_con(self): + results = sorted(iter_quadripartitions(self.tree1, type=list, sort=True, contract_partitions=True)) + RESULTS = [((['CD'], ['EF']), (['a'], ['b'])), ((['CD'], ['X']), (['e'], ['f'])), ((['EF'], ['X']), (['c'], ['d']))] + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], list) + self._test_sorting(results, sort=True) + self.assertEqual(results,RESULTS) + def test_list_sort_con_col(self): + results = sorted(iter_quadripartitions(self.tree1, type=list, sort=True, contract_partitions=True, collapse=True)) + RESULTS = [(['CD'], ['EF'], ['a'], ['b']), (['CD'], ['X'], ['e'], ['f']), (['EF'], ['X'], ['c'], ['d'])] + self.assertIsInstance(results[0][0], list) + self._test_sorting(results, collapse=True, sort=True) + self.assertEqual(results,RESULTS) + def test_list_int(self): + results = list(iter_quadripartitions(self.tree1, type=list, include_internal_nodes=True)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], list) + self._test_sorting(results) + def test_list_sort_int_col(self): + results = sorted(iter_quadripartitions(self.tree1, type=list, sort=True, include_internal_nodes=True, collapse=True)) + RESULTS = [(['a'], ['b'], ['c', 'd', 'CD'], ['e', 'f', 'EF']), (['c'], ['d'], ['a', 'b'], ['e', 'f', 'EF']), (['e'], ['f'], ['a', 'b'], ['c', 'd', 'CD'])] + self.assertIsInstance(results[0][0], list) + self._test_sorting(results, collapse=True, sort=True) + self.assertEqual(results,RESULTS) + def test_set_int(self): + results = list(iter_quadripartitions(self.tree1, include_internal_nodes=True)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], set) + self._test_sorting(results) + def test_list_sort(self): + results = sorted(iter_quadripartitions(self.tree1, type=list, sort=True)) + RESULTS = [((['a'], ['b']), (['c', 'd'], ['e', 'f'])), ((['c'], ['d']), (['a', 'b'], ['e', 'f'])), ((['e'], ['f']), (['a', 'b'], ['c', 'd']))] + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], list) + self._test_sorting(results, sort=True) + self.assertEqual(results,RESULTS) + def test_tuple_col(self): + results = list(iter_quadripartitions(self.tree1, type=tuple, collapse=True)) + self.assertIsInstance(results[0][0], tuple) + self._test_sorting(results, collapse=True) + def test_tuple_int(self): + results = list(iter_quadripartitions(self.tree1, type=tuple, include_internal_nodes=True)) + self.assertIsInstance(results[0][0], tuple) + self.assertIsInstance(results[0][0][0], tuple) + self._test_sorting(results) + def test_set_int_col(self): + results = list(iter_quadripartitions(self.tree1, include_internal_nodes=True, collapse=True)) + self.assertIsInstance(results[0][0], set) + self._test_sorting(results, collapse=True) + def test_tuple_sort_int_col(self): + results = sorted(iter_quadripartitions(self.tree1, type=tuple, sort=True, include_internal_nodes=True, collapse=True)) + RESULTS = [(('a',), ('b',), ('c', 'd', 'CD'), ('e', 'f', 'EF')), (('c',), ('d',), ('a', 'b'), ('e', 'f', 'EF')), (('e',), ('f',), ('a', 'b'), ('c', 'd', 'CD'))] + self.assertIsInstance(results[0][0], tuple) + self._test_sorting(results, collapse=True, sort=True) + self.assertEqual(results, RESULTS) if __name__ == "__main__":