From d3fa320ba3b964f7dd853e9f36d061251c6041b9 Mon Sep 17 00:00:00 2001
From: Eduardo Rodrigues <eduardo.rodrigues@cern.ch>
Date: Tue, 25 Aug 2020 09:09:20 +0200
Subject: [PATCH] Replace Particle.dump_table with Particle.to_dict to remove
 strong coupling in that method (#261)

* replace Particle.dump_table with .to_dict and .to_list

* Adapt test accordingly

* Much better / more correct implementation of to_dict, fixed method doc

* Please Black formatting - why did I forget to test this locally!?

* Fix particle.py for MyPy

* Fix unclosed file warnings in particle/convert.py

* Skip doctest'ing a couple of bits in particle.py's doc

* Fix unclosed file warnings in particle/convert.py

* Fix particle.py for MyPy

* Remove tabulate from dependencies

* Only close file if opened
---
 .ci/azure-steps.yml               |   1 +
 .github/workflows/ci.yml          |   2 +-
 docs/CHANGELOG.md                 |   4 +-
 setup.cfg                         |   5 +-
 src/particle/particle/convert.py  |   4 +
 src/particle/particle/particle.py | 205 +++++++++++++++++++++---------
 tests/particle/test_particle.py   |  16 +--
 7 files changed, 162 insertions(+), 75 deletions(-)

diff --git a/.ci/azure-steps.yml b/.ci/azure-steps.yml
index 54dbdbdf..1bcc7c8d 100644
--- a/.ci/azure-steps.yml
+++ b/.ci/azure-steps.yml
@@ -7,6 +7,7 @@ steps:
 - script: |
     python -m pip install --upgrade pip
     python -m pip install --upgrade pytest
+    python -m pip install tabulate
     python -m pip install pytest-cov
   displayName: 'Install dependencies'
 
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ddd77c10..21a73d8b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -113,7 +113,7 @@ jobs:
       run: python setup.py sdist
 
     - name: Install requirements
-      run: python -m pip install attrs hepunits tabulate importlib_resources --target src
+      run: python -m pip install attrs hepunits importlib_resources --target src
 
     - name: Make ZipApp
       run: python -m zipapp -c -p "/usr/bin/env python3" -m "particle.__main__:main" -o ../particle.pyz .
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 7e7bf9cc..1e70bf04 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -7,6 +7,8 @@ Version 0.12.0
 In preparation.
 
 - `Particle` class:
+  - `Particle.dump_table()` method removed and replaced with methods
+    `Particle.to_dict()` and `Particle.to_list()` (avoids strong coupling of packages).
   - Improve LaTeX particle names with `\prime` in them,
     to have correct HTML names for such particles.
 - `PDGID` class:
@@ -28,7 +30,7 @@ In preparation.
   - Demo notebook slightly extended.
   - More explanations in the functions qualifying PDG IDs.
 - Miscellaneous:
-  - LaTeX-to-HTML particle name conversion function fixed to Correctly
+  - LaTeX-to-HTML particle name conversion function fixed to correctly
     deal with names containing `\prime` and/or `\tilde`.
 
 
diff --git a/setup.cfg b/setup.cfg
index 1a117e58..6d32d811 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,7 +7,7 @@ author = Eduardo Rodrigues
 author_email = eduardo.rodrigues@cern.ch
 maintainer = The Scikit-HEP admins
 maintainer_email = scikit-hep-admins@googlegroups.com
-description = PDG particle data and identification codes
+description = Extended PDG particle data and MC identification codes
 long_description = file: README.rst
 url = https://github.com/scikit-hep/particle
 license = BSD 3-Clause License
@@ -60,15 +60,12 @@ where=src
 test =
     pytest
     pandas; python_version>"3.4"
-    tabulate
 dev =
     pandas; python_version>"3.4"
-    tabulate
     check-manifest>=0.39
     black==19.10b0
 all =
     pandas; python_version>"3.4"
-    tabulate
     check-manifest>=0.39
     black==19.10b0
 
diff --git a/src/particle/particle/convert.py b/src/particle/particle/convert.py
index 118dd2e3..ada6368f 100644
--- a/src/particle/particle/convert.py
+++ b/src/particle/particle/convert.py
@@ -109,6 +109,10 @@ def filter_file(fileobject):
         if not line.lstrip("\ufeff").lstrip().startswith("*"):
             stream.write(line)
     stream.seek(0)
+
+    if not fileobject.closed:
+        fileobject.close()
+
     return stream
 
 
diff --git a/src/particle/particle/particle.py b/src/particle/particle/particle.py
index 2f24996f..9dcae19d 100644
--- a/src/particle/particle/particle.py
+++ b/src/particle/particle/particle.py
@@ -279,24 +279,20 @@ def all(cls):
         return cls._table if cls._table is not None else set()
 
     @classmethod
-    def dump_table(
+    def to_list(
         cls,
         exclusive_fields=(),  # type: Iterable[str]
         exclude_fields=(),  # type: Iterable[str]
         n_rows=-1,
         filter_fn=None,  # type: Optional[Callable[[Particle], bool]]
-        filename=None,  # type: Optional[str]
-        tablefmt="simple",
-        floatfmt=".12g",
-        numalign="decimal",
     ):
-        # type: (...) -> Optional[str]
+        # type: (...) -> List[List[Any]]
         """
-        Dump the internal particle data CSV table,
-        loading it from the default location if no table has yet been loaded.
+        Render a search (via `findall`) on the internal particle data CSV table
+        as a `list`, loading the table from the default location if no table has yet been loaded.
 
-        The table attributes are those of the class. By default all attributes
-        are used as table fields. Their complete list is:
+        The returned attributes are those of the class. By default all attributes
+        are used as fields. Their complete list is:
             pdgid
             pdg_name
             mass
@@ -316,12 +312,14 @@ def dump_table(
             quarks
             latex_name
 
-        Optionally dump to a file.
+        It is possible to add as returned fields any `Particle` class property,
+        e.g. 'name', `J` or `ctau`, see examples below.
 
         Parameters
         ----------
         exclusive_fields: list, optional, default is []
-            Exclusive list of fields to print out.
+            Exclusive list of fields to print out,
+            which can be any `Particle` class property.
         exclude_fields: list, optional, default is []
             List of table fields to exclude in the printout.
             Relevant only when exclusive_fields is not given.
@@ -329,40 +327,53 @@ def dump_table(
             Number of table rows to print out.
         filter_fn: function, optional, default is None
             Apply a filter to each particle.
-            See findall(...) for typical use cases.
-        filename: str, optional, default is None
-            Name of file where to dump the table.
-            By default the table is dumped to stdout.
-        tablefmt: str, optional, default is 'simple'
-            Table formatting option, see the tabulate's package
-            tabulate function for a description of available options.
-            The most common options are:
-            'plain', 'simple', 'grid', 'rst', 'html', 'latex'.
-        floatfmt: str, optional, default is '.12g'
-            Number formatting, see the tabulate's package
-            tabulate function for a description of available options.
-        numalign: str or None, oprional, default is 'decimal'
-            Column alignment for numbers, see the tabulate's package
-            tabulate function for a description of available options.
+            See `findall(...)`` for typical use cases.
 
         Returns
         -------
-        str or None if filename is None or not, respectively.
+        The particle table query as a `list`.
 
         Note
         ----
-        Uses the `tabulate` package.
+        The `tabulate` package is suggested as a means to print-out
+        the contents of the query as a nicely formatted table.
 
         Examples
         --------
-        print(Particle.dump_table())
-        print(Particle.dump_table(n_rows=5))
-        print(Particle.dump_table(exclusive_fields=['pdgid', 'pdg_name']))
-        print(Particle.dump_table(filter_fn=lambda p: p.pdgid.has_bottom))
-        Particle.dump_table(filename='output.txt', tablefmt='rst')
-        """
-        from tabulate import tabulate
+        Reproduce the whole particle table kept internally:
+
+        >>> Particle.to_list()    # doctest: +SKIP
+
+        Reduce the information on the particle table to the only fields
+        ['pdgid', 'pdg_name'] and render the first 5 particles:
+
+        >>> query_as_list = Particle.to_list(exclusive_fields=['pdgid', 'pdg_name'], n_rows=5)
+        >>> from tabulate import tabulate
+        >>> print(tabulate(query_as_list, headers='firstrow'))    # doctest: +SKIP
+
+        Request the properties of a specific list of particles:
+
+        >>> query_as_list = Particle.to_list(filter_fn=lambda p: p.pdgid.is_lepton and p.charge!=0, exclusive_fields=['pdgid', 'name', 'mass', 'charge'])
+
+        >>> print(tabulate(query_as_list, headers='firstrow', tablefmt="rst", floatfmt=".12g", numalign="decimal"))
+        =======  ======  ===============  ========
+          pdgid  name               mass    charge
+        =======  ======  ===============  ========
+             11  e-         0.5109989461        -1
+            -11  e+         0.5109989461         1
+             13  mu-      105.6583745           -1
+            -13  mu+      105.6583745            1
+             15  tau-    1776.86                -1
+            -15  tau+    1776.86                 1
+             17  tau'-                          -1
+            -17  tau'+                           1
+        =======  ======  ===============  ========
+
+        Save it to a file:
 
+        >>> with open('particles.txt', "w") as outfile:    # doctest: +SKIP
+        ...    print(tabulate(query_as_list, headers='firstrow', tablefmt="rst", floatfmt=".12g", numalign="decimal"), file=outfile)    # doctest: +SKIP
+        """
         if not cls.table_loaded():
             cls.load_table()
 
@@ -387,37 +398,113 @@ def dump_table(
         if filter_fn is not None:
             tbl_all = cls.findall(filter_fn)
 
-        # In any case, only dump a given number of rows?
+        # In any case, only keep a given number of rows?
         if n_rows >= 0:
             tbl_all = tbl_all[:n_rows]
 
         # Build all table rows
         tbl = []
+        tbl.append(tbl_names)
         for p in tbl_all:
             tbl.append([getattr(p, attr) for attr in tbl_names])
 
-        if filename:
-            filename = str(filename)  # Conversion to handle pathlib on Python < 3.6
-            with open(filename, "w") as outfile:
-                print(
-                    tabulate(
-                        tbl,
-                        headers=tbl_names,
-                        tablefmt=tablefmt,
-                        floatfmt=floatfmt,
-                        numalign=numalign,
-                    ),
-                    file=outfile,
-                )
-            return None
-        else:
-            return tabulate(
-                tbl,
-                headers=tbl_names,
-                tablefmt=tablefmt,
-                floatfmt=floatfmt,
-                numalign=numalign,
-            )
+        return tbl
+
+    @classmethod
+    def to_dict(cls, *args, **kwargs):
+        # type: (...) -> Dict[List[str], List[Any]]
+        """
+        Render a search (via `findall`) on the internal particle data CSV table
+        as a `dict`, loading the table from the default location if no table has yet been loaded.
+
+        The returned attributes are those of the class. By default all attributes
+        are used as fields. Their complete list is:
+            pdgid
+            pdg_name
+            mass
+            mass_upper
+            mass_lower
+            width
+            width_upper
+            width_lower
+            three_charge
+            I
+            G
+            P
+            C
+            anti_flag
+            rank
+            status
+            quarks
+            latex_name
+
+        It is possible to add as returned fields any `Particle` class property,
+        e.g. 'name', `J` or `ctau`, see examples below.
+
+        Parameters
+        ----------
+        exclusive_fields: list, optional, default is []
+            Exclusive list of fields to print out,
+            which can be any `Particle` class property.
+        exclude_fields: list, optional, default is []
+            List of table fields to exclude in the printout.
+            Relevant only when exclusive_fields is not given.
+        n_rows: int, optional, defaults to all rows
+            Number of table rows to print out.
+        filter_fn: function, optional, default is None
+            Apply a filter to each particle.
+            See `findall(...)`` for typical use cases.
+
+        Returns
+        -------
+        The particle table query as a `dict`.
+
+        Note
+        ----
+        The `tabulate` package is suggested as a means to print-out
+        the contents of the query as a nicely formatted table.
+
+        Examples
+        --------
+        Reproduce the whole particle table kept internally:
+
+        >>> Particle.to_dict()    # doctest: +SKIP
+
+        Reduce the information on the particle table to the only fields
+        ['pdgid', 'pdg_name'] and render the first 5 particles:
+
+        >>> query_as_dict = Particle.to_dict(exclusive_fields=['pdgid', 'pdg_name'], n_rows=5)
+        >>> from tabulate import tabulate    # doctest: +SKIP
+        >>> print(tabulate(query_as_dict, headers='keys'))    # doctest: +SKIP
+
+        Request the properties of a specific list of particles:
+
+        >>> query_as_dict = Particle.to_dict(filter_fn=lambda p: p.pdgid.is_lepton and p.charge!=0, exclusive_fields=['pdgid', 'name', 'mass', 'charge'])
+
+        >>> print(tabulate(query_as_dict, headers='keys', tablefmt="rst", floatfmt=".12g", numalign="decimal"))    # doctest: +SKIP
+        =======  ======  ===============  ========
+          pdgid  name               mass    charge
+        =======  ======  ===============  ========
+             11  e-         0.5109989461        -1
+            -11  e+         0.5109989461         1
+             13  mu-      105.6583745           -1
+            -13  mu+      105.6583745            1
+             15  tau-    1776.86                -1
+            -15  tau+    1776.86                 1
+             17  tau'-                          -1
+            -17  tau'+                           1
+        =======  ======  ===============  ========
+
+        Save it to a file:
+
+        >>> with open('particles.txt', "w") as outfile:    # doctest: +SKIP
+        ...    print(tabulate(query_as_dict, headers='keys', tablefmt="rst", floatfmt=".12g", numalign="decimal"), file=outfile)    # doctest: +SKIP
+        """
+        query_as_list = cls.to_list(*args, **kwargs)
+
+        return dict(
+            zip(query_as_list[0], zip(*query_as_list[1:]))
+        )  # dict(zip(keys, values))
 
     @classmethod
     def load_table(cls, filename=None, append=False, _name=None):
diff --git a/tests/particle/test_particle.py b/tests/particle/test_particle.py
index 3ab51106..10655a6b 100644
--- a/tests/particle/test_particle.py
+++ b/tests/particle/test_particle.py
@@ -535,29 +535,25 @@ def test_default_particle():
     assert p.status == Status.NotInPDT
 
 
-def test_dump_table():
-    tbl = Particle.dump_table(
+def test_to_list():
+    tbl = Particle.to_list(
         filter_fn=lambda p: p.pdgid.is_meson
         and p.pdgid.has_strange
         and p.ctau > 1 * meter,
         exclusive_fields=["pdgid", "name"],
     )
-    assert (
-        tbl
-        == "  pdgid  name\n-------  ------\n    130  K(L)0\n    321  K+\n   -321  K-"
-    )
+    assert tbl == [["pdgid", "name"], [130, "K(L)0"], [321, "K+"], [-321, "K-"]]
 
-    tbl = Particle.dump_table(
+    tbl = Particle.to_list(
         filter_fn=lambda p: p.pdgid > 0
         and p.pdgid.is_meson
         and p.pdgid.has_strange
         and p.pdgid.has_charm,
         exclusive_fields=["name"],
         n_rows=2,
-        tablefmt="html",
     )
-
-    assert "<td>D(s)+ </td></tr>\n<tr><td>D(s)*+</td>" in tbl
+    assert ["D(s)+"] in tbl
+    assert ["D(s)*+"] in tbl
 
 
 ampgen_style_names = (