Merge pull request #7 from MethodicalAcceleratorDesign/pandas

Add Conversion to Dataframes
MethodicalAcceleratorDesign · Oct 18, 2023 · f2ebac4 · f2ebac4
2 parents ea118d0 + 390fc13
commit f2ebac4
Show file tree

Hide file tree

Showing 11 changed files with 218 additions and 40 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+0.4.2 (2023/10/18)
+
+Add `to_df` method to objects, allowing for easy conversion to pandas dataframes. \
+
 0.4.1 (2023/08/19)
 
 Change the way `send_vars` and `recv_vars` work, they now use kwargs and args respectively. \

diff --git a/docs/source/dataframes.rst b/docs/source/dataframes.rst
@@ -0,0 +1,12 @@
+Converting TFS tables to Pandas DataFrames
+------------------------------------------
+
+The package `pandas` is an optional module, that has an inbuilt function to convert TFS tables (called ``mtable`` in MAD-NG) to a `pandas` ``DataFrame`` or a ``TfsDataFrame`` if you have `tfs-pandas` installed. In the example below, we generate an ``mtable`` by doing a survey and twiss on the Proton Synchrotron lattice, and then convert these to a ``DataFrame`` (or ``TfsDataFrame``).
+
+.. literalinclude:: ../../examples/ex-ps-twiss/ps-twiss.py
+ :lines: 18, 24, 41-49
+ :linenos:
+
+In this script, we create the variables ``srv`` and ``mtbl`` which are ``mtable``s created by ``survey`` and ``twiss`` respectively. Then first, we convert the ``mtbl`` to a ``DataFrame`` and print it, before checking if you have `tfs-pandas` installed to check if we need to print out the header of the TFS table, which is stored in the attrs attribute of the ``DataFrame``, but is automatically printed when using `tfs-pandas`. Then we convert the ``srv`` to a ``DataFrame`` and print it.
+
+Note: If your object is not an ``mtable`` then this function will raise a ``TypeError``, but it is available to call on all ``object`` types in MAD-NG.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -16,6 +16,7 @@ Welcome to the documentation for PyMAD-NG!
  ex-managing-refs
  ex-fodo
  ex-lhc-couplingLocal
+ dataframes
  ex-recv-lhc
  examples
 

diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -1,7 +1,15 @@
 API Reference
 =============
 
-.. toctree::
- :maxdepth: 4
+PyMAD-NG Module contents
+------------------------
 
- pymadng
+.. automodule:: pymadng
+ :members:
+ :undoc-members:
+ :show-inheritance:
+
+Useful functions for MAD References
+-----------------------------------
+
+.. autofunction:: pymadng.madp_classes.madhl_obj.to_df
diff --git a/docs/source/pymadng.rst b/docs/source/pymadng.rst
diff --git a/examples/ex-ps-twiss/ps-twiss.py b/examples/ex-ps-twiss/ps-twiss.py
@@ -1,9 +1,6 @@
-import time
+import os, time, pandas
 from pymadng import MAD
 
-import numpy as np
-import matplotlib.pyplot as plt
-import os
 orginal_dir = os.getcwd()
 os.chdir(os.path.dirname(os.path.realpath(__file__)))
 
@@ -37,8 +34,18 @@
  mad.mtbl.write("'PS_twiss_py.tfs'",
  mad.py_strs_to_mad_strs(
  ["name", "kind", "s", "x", "px", "beta11", "alfa11", "beta22", "alfa22","dx",
- "dpx", "mu1", "mu2", "l", "angle", "k0l", "k1l", "k2l", "k3l", "hkick", "vkick"]),
- ).eval()
- #.eval() so tws:write() can be finished before MAD is shutdown
+ "dpx", "mu1", "mu2", "l", "angle", "k0l", "k1l", "k2l", "k3l", "hkick", "vkick"]
+ )
+ )
+
+ df = mad.mtbl.to_df()
+ print(df)
+ try:
+ import tfs
+ except ImportError:
+ print("tfs-pandas not installed, so the header is stored in attrs instead of headers")
+ print(df.attrs)
+
+ print(mad.srv.to_df())
 
 os.chdir(orginal_dir)
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,3 +36,7 @@ where = ["src"]
 
 [tool.setuptools.dynamic]
 version = {attr = "pymadng.__version__"}
+
+[project.optional-dependencies]
+pandas = ["pandas>=1.0,<2.1.0"]
+tfs = ["tfs-pandas>3.0.0"]
diff --git a/src/pymadng/__init__.py b/src/pymadng/__init__.py
@@ -1,7 +1,7 @@
 from .madp_object import MAD
 
 __title__ = "pymadng"
-__version__ = "0.4.1"
+__version__ = "0.4.2"
 
 __summary__ = "Python interface to MAD-NG running as subprocess"
 __uri__ = "https://github.com/MethodicalAcceleratorDesign/MADpy"

diff --git a/src/pymadng/madp_classes.py b/src/pymadng/madp_classes.py
@@ -87,16 +87,12 @@ def __dir__(self) -> Iterable[str]:
  name = self._name
  if name[:5] == "_last":
  name = name + ".__metatable or " + name
- script = f"""
+ self._mad.psend(f"""
  local modList={{}}; local i = 1;
  for modname, mod in pairs({name}) do modList[i] = modname; i = i + 1; end
  {self._mad.py_name}:send(modList)
- """
- self._mad.psend(script)
- varnames = [
- x for x in self._mad.recv() if isinstance(x, str) and x[0] != "_"
- ]
- return varnames
+ """)
+ return [x for x in self._mad.recv() if isinstance(x, str) and x[0] != "_"]
 
 
 class madhl_obj(madhl_ref):
@@ -108,9 +104,7 @@ def __dir__(self) -> Iterable[str]:
  varnames = self._mad.precv(f"{self._name}:get_varkeys(MAD.object, false)")
 
  if not self._mad.ipython_use_jedi:
- varnames.extend(
- [x + "()" for x in self._mad.recv() if not x in varnames]
- )
+ varnames.extend([x + "()" for x in self._mad.recv() if not x in varnames])
  return varnames
 
  def __call__(self, *args, **kwargs):
@@ -136,16 +130,96 @@ def __next__(self):
  except IndexError:
  raise StopIteration
 
+ def to_df(self, columns: list = None):
+ """Converts the object to a pandas dataframe.
+
+ This function imports pandas and tfs-pandas, if tfs-pandas is not installed, it will only return a pandas dataframe.
+
+ Args:
+ columns (list, optional): List of columns to include in the dataframe. Defaults to None.
+
+ Returns:
+ pandas.DataFrame or tfs.TfsDataFrame: The dataframe containing the object's data.
+ """
+ if not self._mad.precv(f"MAD.typeid.is_mtable({self._name})"):
+ raise TypeError("Object is not a table, cannot convert to dataframe")
+
+ import pandas as pd
+
+ try:
+ import tfs
+
+ DataFrame, header = tfs.TfsDataFrame, "headers"
+ except ImportError:
+ DataFrame, header = pd.DataFrame, "attrs"
+
+ py_name, obj_name = self._mad.py_name, self._name
+ self._mad.send( # Sending every value individually is slow (sending vectors is fast)
+ f"""
+-- Get the column names 
+colnames = {obj_name}:colnames()
+{py_name}:send(colnames)
+
+-- Loop through all the column names and send them with their data
+for i, colname in ipairs(colnames) do
+ local col = {obj_name}:getcol(colname)
+
+ -- If the column is not a vector and has a metatable, then convert it to a table (reference or generator columns)
+ if not MAD.typeid.is_vector(col) and getmetatable(col) then
+ local tbl = table.new(#col, 0)
+ conv_to_vec = true
+ for i, val in ipairs(col) do 
+ tbl[i] = val 
+ -- From testing, checking if I can convert to a vector is faster than sending the table
+ conv_to_vec = conv_to_vec and MAD.typeid.is_number(val)
+ end
+ col = conv_to_vec and MAD.vector(tbl) or tbl
+ end
+
+ -- Send the column data
+ {py_name}:send(col)
+end
+
+-- Get the header names and send the count
+local header = {obj_name}.header
+{py_name}:send(header)
+
+-- Loop through all the header names and send them
+for i, attr in ipairs(header) do 
+ {py_name}:send({obj_name}[attr])
+end
+"""
+ )
+ # Create the dataframe from the data sent
+ colnames = self._mad.recv()
+ full_tbl = { # The string is in case references are within the table
+ col: self._mad.recv(f"{obj_name}:getcol('{col}')") for col in colnames
+ }
+
+ # Not keen on the .squeeze() but it works (ng always sends 2D arrays, but I need the columns in 1D)
+ for key, val in full_tbl.items():
+ if isinstance(val, np.ndarray):
+ full_tbl[key] = val.squeeze()
+ df = DataFrame(full_tbl)
+
+ if columns:
+ df = df[columns] # Only keep the columns specified
+
+ # Get the header and add it to the dataframe
+ hnams = self._mad.recv()
+ setattr(df, header,
+ {hnam: self._mad.recv(f"{obj_name}['{hnam}']") for hnam in hnams}
+ )
+ return df
+
 
 class madhl_fun(madhl_ref):
  # ----------------------------------Calling/Creating functions--------------------------------------#
  def __call_func(self, funcName: str, *args):
  """Call the function funcName and store the result in ``_last``."""
  rtrn_ref = madhl_reflast(self._mad)
  args_string, vars_to_send = get_args_string(self._mad.py_name, *args)
- self._mad.send(
- f"{rtrn_ref._name} = __mklast__({funcName}({args_string}))\n"
- )
+ self._mad.send(f"{rtrn_ref._name} = __mklast__({funcName}({args_string}))\n")
  for var in vars_to_send:
  self._mad.send(var)
  return rtrn_ref
@@ -181,9 +255,9 @@ class madhl_last: # The init and del for a _last object
  def __init__(self, mad_proc: mad_process):
  self._mad = mad_proc
  self._lst_cntr = mad_proc.lst_cntr
- self._lastnum  = mad_proc.lst_cntr.get()
- self._name  = f"_last[{self._lastnum}]"
- self._parent  =  "_last"
+ self._lastnum = mad_proc.lst_cntr.get()
+ self._name = f"_last[{self._lastnum}]"
+ self._parent = "_last"
 
  def __del__(self):
  self._lst_cntr.set(self._lastnum)

diff --git a/src/pymadng/madp_object.py b/src/pymadng/madp_object.py
@@ -14,8 +14,8 @@
 # TODO: Make it so that MAD does the loop for variables not python (speed)
 # TODO: Review recv_and exec:
 """
-Default arguments are evaluated once at module load time. 
-This may cause problems if the argument is a mutable object such as a list or a dictionary. 
+Default arguments are evaluated once at module load time.
+This may cause problems if the argument is a mutable object such as a list or a dictionary.
 If the function modifies the object (e.g., by appending an item to a list), the default value is modified.
 Source: https://google.github.io/styleguide/pyguide.html
 """

diff --git a/tests/obj_tests.py b/tests/obj_tests.py
@@ -1,4 +1,4 @@
-import unittest, os, time
+import unittest, os, time, sys, tfs, pandas
 
 from pymadng import MAD
 from pymadng.madp_classes import madhl_ref, madhl_obj, madhl_fun
@@ -299,6 +299,84 @@ def test_dir(self):
  self.assertEqual(dir(mad.quadrupole(knl=[0, 0.3], l = 1)), quad_exp) #Dir of instance of class should be the same as the class
  self.assertEqual(dir(mad.quadrupole(asd = 10, qwe = 20)), sorted(quad_exp + ["asd", "qwe"])) #Adding to the instance should change the dir
 
+class TestDataFrame(unittest.TestCase):
+
+ def generalDataFrame(self, headers, DataFrame):
+ mad = MAD()
+ mad.send("""
+test = mtable{
+ {"string"}, "number", "integer", "complex", "boolean", "list", "table", "range",! "generator",
+ name = "test",
+ header = {"string", "number", "integer", "complex", "boolean", "list", "table", "range"},
+ string = "string",
+ number = 1.234567890,
+ integer = 12345670,
+ complex = 1.3 + 1.2i,
+ boolean = true,
+ list = {1, 2, 3, 4, 5},
+ table = {1, 2, ["key"] = "value"},
+ range = 1..11,
+}
+ + {"a", 1.1, 1, 1 + 2i, true , {1, 2 }, {1 , 2 , ["3" ] = 3 }, 1..11,}
+ + {"b", 2.2, 2, 2 + 3i, false, {3, 4 }, {4 , 5 , ["6" ] = 6 }, 2..12,}
+ + {"c", 3.3, 3, 3 + 4i, true , {5, 6 }, {7 , 8 , ["9" ] = 9 }, 3..13,}
+ + {"d", 4.4, 4, 4 + 5i, false, {7, 8 }, {10, 11, ["12"] = 12}, 4..14,}
+ + {"e", 5.5, 5, 5 + 6i, true , {9, 10}, {13, 14, ["15"] = 15}, 5..15,}
+
+test:addcol("generator", \\ri, m -> m:getcol("number")[ri] + 1i * m:getcol("number")[ri])
+test:write("test")
+ """
+ )
+ df = mad.test.to_df()
+ self.assertTrue(isinstance(df, DataFrame))
+ self.assertEqual(getattr(df, headers)["name"], "test")
+ self.assertEqual(getattr(df, headers)["string"], "string")
+ self.assertEqual(getattr(df, headers)["number"], 1.234567890)
+ self.assertEqual(getattr(df, headers)["integer"], 12345670)
+ self.assertEqual(getattr(df, headers)["complex"], 1.3 + 1.2j)
+ self.assertEqual(getattr(df, headers)["boolean"], True)
+ self.assertEqual(getattr(df, headers)["list"], [1, 2, 3, 4, 5])
+ lst, hsh = getattr(df, headers)["table"]
+ self.assertEqual(lst, [1, 2])
+ self.assertEqual(hsh["key"], "value")
+
+ self.assertEqual(df["string"].tolist(), ["a", "b", "c", "d", "e"])
+ self.assertEqual(df["number"].tolist(), [1.1, 2.2, 3.3, 4.4, 5.5])
+ self.assertEqual(df["integer"].tolist(), [1, 2, 3, 4, 5])
+ self.assertEqual(df["complex"].tolist(), [1 + 2j, 2 + 3j, 3 + 4j, 4 + 5j, 5 + 6j])
+ self.assertEqual(df["boolean"].tolist(), [True, False, True, False, True])
+ self.assertEqual(df["list"].tolist(), [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
+ tbl = df["table"].tolist()
+ for i in range(len(tbl)):
+ lst, hsh = tbl[i]
+ self.assertEqual(lst, [i*3 + 1, i*3 + 2])
+ self.assertEqual(hsh[str((i+1) * 3)], (i+1) * 3)
+ self.assertEqual(
+ df["range"].tolist(), 
+ [range(1, 12), range(2, 13), range(3, 14), range(4, 15), range(5, 16)]
+ )
+
+ def testTfsDataFrame(self):
+ self.generalDataFrame("headers", tfs.TfsDataFrame)
+
+ def testPandasDataFrame(self):
+ sys.modules["tfs"] = None #Remove tfs-pandas
+ self.generalDataFrame("attrs", pandas.DataFrame)
+ del sys.modules["tfs"]
+
+ def testFailure(self):
+ with MAD() as mad:
+ mad.send("""
+test = mtable{"string", "number"} + {"a", 1.1} + {"b", 2.2}
+ """)
+ pandas = sys.modules["pandas"]
+ sys.modules["pandas"] = None
+ self.assertRaises(ImportError, lambda: mad.test.to_df())
+ sys.modules["pandas"] = pandas
+ df = mad.test.to_df()
+ self.assertTrue(isinstance(df, tfs.TfsDataFrame))
+ self.assertEqual(df["string"].tolist(), ["a", "b"])
+ self.assertEqual(df["number"].tolist(), [1.1, 2.2])
 
 class TestSpeed(unittest.TestCase):