Skip to content

Commit

Permalink
Merge pull request #7 from MethodicalAcceleratorDesign/pandas
Browse files Browse the repository at this point in the history
Add Conversion to Dataframes
  • Loading branch information
jgray-19 committed Oct 18, 2023
2 parents ea118d0 + 390fc13 commit f2ebac4
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 40 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.4.2 (2023/10/18)

Add `to_df` method to objects, allowing for easy conversion to pandas dataframes. \

0.4.1 (2023/08/19)

Change the way `send_vars` and `recv_vars` work, they now use kwargs and args respectively. \
Expand Down
12 changes: 12 additions & 0 deletions docs/source/dataframes.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Converting TFS tables to Pandas DataFrames
------------------------------------------

The package `pandas` is an optional module, that has an inbuilt function to convert TFS tables (called ``mtable`` in MAD-NG) to a `pandas` ``DataFrame`` or a ``TfsDataFrame`` if you have `tfs-pandas` installed. In the example below, we generate an ``mtable`` by doing a survey and twiss on the Proton Synchrotron lattice, and then convert these to a ``DataFrame`` (or ``TfsDataFrame``).

.. literalinclude:: ../../examples/ex-ps-twiss/ps-twiss.py
:lines: 18, 24, 41-49
:linenos:

In this script, we create the variables ``srv`` and ``mtbl`` which are ``mtable``s created by ``survey`` and ``twiss`` respectively. Then first, we convert the ``mtbl`` to a ``DataFrame`` and print it, before checking if you have `tfs-pandas` installed to check if we need to print out the header of the TFS table, which is stored in the attrs attribute of the ``DataFrame``, but is automatically printed when using `tfs-pandas`. Then we convert the ``srv`` to a ``DataFrame`` and print it.

Note: If your object is not an ``mtable`` then this function will raise a ``TypeError``, but it is available to call on all ``object`` types in MAD-NG.
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Welcome to the documentation for PyMAD-NG!
ex-managing-refs
ex-fodo
ex-lhc-couplingLocal
dataframes
ex-recv-lhc
examples

Expand Down
14 changes: 11 additions & 3 deletions docs/source/modules.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
API Reference
=============

.. toctree::
:maxdepth: 4
PyMAD-NG Module contents
------------------------

pymadng
.. automodule:: pymadng
:members:
:undoc-members:
:show-inheritance:

Useful functions for MAD References
-----------------------------------

.. autofunction:: pymadng.madp_classes.madhl_obj.to_df
10 changes: 0 additions & 10 deletions docs/source/pymadng.rst

This file was deleted.

21 changes: 14 additions & 7 deletions examples/ex-ps-twiss/ps-twiss.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import time
import os, time, pandas
from pymadng import MAD

import numpy as np
import matplotlib.pyplot as plt
import os
orginal_dir = os.getcwd()
os.chdir(os.path.dirname(os.path.realpath(__file__)))

Expand Down Expand Up @@ -37,8 +34,18 @@
mad.mtbl.write("'PS_twiss_py.tfs'",
mad.py_strs_to_mad_strs(
["name", "kind", "s", "x", "px", "beta11", "alfa11", "beta22", "alfa22","dx",
"dpx", "mu1", "mu2", "l", "angle", "k0l", "k1l", "k2l", "k3l", "hkick", "vkick"]),
).eval()
#.eval() so tws:write() can be finished before MAD is shutdown
"dpx", "mu1", "mu2", "l", "angle", "k0l", "k1l", "k2l", "k3l", "hkick", "vkick"]
)
)

df = mad.mtbl.to_df()
print(df)
try:
import tfs
except ImportError:
print("tfs-pandas not installed, so the header is stored in attrs instead of headers")
print(df.attrs)

print(mad.srv.to_df())

os.chdir(orginal_dir)
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,7 @@ where = ["src"]

[tool.setuptools.dynamic]
version = {attr = "pymadng.__version__"}

[project.optional-dependencies]
pandas = ["pandas>=1.0,<2.1.0"]
tfs = ["tfs-pandas>3.0.0"]
2 changes: 1 addition & 1 deletion src/pymadng/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .madp_object import MAD

__title__ = "pymadng"
__version__ = "0.4.1"
__version__ = "0.4.2"

__summary__ = "Python interface to MAD-NG running as subprocess"
__uri__ = "https://github.com/MethodicalAcceleratorDesign/MADpy"
Expand Down
106 changes: 90 additions & 16 deletions src/pymadng/madp_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,12 @@ def __dir__(self) -> Iterable[str]:
name = self._name
if name[:5] == "_last":
name = name + ".__metatable or " + name
script = f"""
self._mad.psend(f"""
local modList={{}}; local i = 1;
for modname, mod in pairs({name}) do modList[i] = modname; i = i + 1; end
{self._mad.py_name}:send(modList)
"""
self._mad.psend(script)
varnames = [
x for x in self._mad.recv() if isinstance(x, str) and x[0] != "_"
]
return varnames
""")
return [x for x in self._mad.recv() if isinstance(x, str) and x[0] != "_"]


class madhl_obj(madhl_ref):
Expand All @@ -108,9 +104,7 @@ def __dir__(self) -> Iterable[str]:
varnames = self._mad.precv(f"{self._name}:get_varkeys(MAD.object, false)")

if not self._mad.ipython_use_jedi:
varnames.extend(
[x + "()" for x in self._mad.recv() if not x in varnames]
)
varnames.extend([x + "()" for x in self._mad.recv() if not x in varnames])
return varnames

def __call__(self, *args, **kwargs):
Expand All @@ -136,16 +130,96 @@ def __next__(self):
except IndexError:
raise StopIteration

def to_df(self, columns: list = None):
"""Converts the object to a pandas dataframe.
This function imports pandas and tfs-pandas, if tfs-pandas is not installed, it will only return a pandas dataframe.
Args:
columns (list, optional): List of columns to include in the dataframe. Defaults to None.
Returns:
pandas.DataFrame or tfs.TfsDataFrame: The dataframe containing the object's data.
"""
if not self._mad.precv(f"MAD.typeid.is_mtable({self._name})"):
raise TypeError("Object is not a table, cannot convert to dataframe")

import pandas as pd

try:
import tfs

DataFrame, header = tfs.TfsDataFrame, "headers"
except ImportError:
DataFrame, header = pd.DataFrame, "attrs"

py_name, obj_name = self._mad.py_name, self._name
self._mad.send( # Sending every value individually is slow (sending vectors is fast)
f"""
-- Get the column names
colnames = {obj_name}:colnames()
{py_name}:send(colnames)
-- Loop through all the column names and send them with their data
for i, colname in ipairs(colnames) do
local col = {obj_name}:getcol(colname)
-- If the column is not a vector and has a metatable, then convert it to a table (reference or generator columns)
if not MAD.typeid.is_vector(col) and getmetatable(col) then
local tbl = table.new(#col, 0)
conv_to_vec = true
for i, val in ipairs(col) do
tbl[i] = val
-- From testing, checking if I can convert to a vector is faster than sending the table
conv_to_vec = conv_to_vec and MAD.typeid.is_number(val)
end
col = conv_to_vec and MAD.vector(tbl) or tbl
end
-- Send the column data
{py_name}:send(col)
end
-- Get the header names and send the count
local header = {obj_name}.header
{py_name}:send(header)
-- Loop through all the header names and send them
for i, attr in ipairs(header) do
{py_name}:send({obj_name}[attr])
end
"""
)
# Create the dataframe from the data sent
colnames = self._mad.recv()
full_tbl = { # The string is in case references are within the table
col: self._mad.recv(f"{obj_name}:getcol('{col}')") for col in colnames
}

# Not keen on the .squeeze() but it works (ng always sends 2D arrays, but I need the columns in 1D)
for key, val in full_tbl.items():
if isinstance(val, np.ndarray):
full_tbl[key] = val.squeeze()
df = DataFrame(full_tbl)

if columns:
df = df[columns] # Only keep the columns specified

# Get the header and add it to the dataframe
hnams = self._mad.recv()
setattr(df, header,
{hnam: self._mad.recv(f"{obj_name}['{hnam}']") for hnam in hnams}
)
return df


class madhl_fun(madhl_ref):
# ----------------------------------Calling/Creating functions--------------------------------------#
def __call_func(self, funcName: str, *args):
"""Call the function funcName and store the result in ``_last``."""
rtrn_ref = madhl_reflast(self._mad)
args_string, vars_to_send = get_args_string(self._mad.py_name, *args)
self._mad.send(
f"{rtrn_ref._name} = __mklast__({funcName}({args_string}))\n"
)
self._mad.send(f"{rtrn_ref._name} = __mklast__({funcName}({args_string}))\n")
for var in vars_to_send:
self._mad.send(var)
return rtrn_ref
Expand Down Expand Up @@ -181,9 +255,9 @@ class madhl_last: # The init and del for a _last object
def __init__(self, mad_proc: mad_process):
self._mad = mad_proc
self._lst_cntr = mad_proc.lst_cntr
self._lastnum = mad_proc.lst_cntr.get()
self._name = f"_last[{self._lastnum}]"
self._parent = "_last"
self._lastnum = mad_proc.lst_cntr.get()
self._name = f"_last[{self._lastnum}]"
self._parent = "_last"

def __del__(self):
self._lst_cntr.set(self._lastnum)
Expand Down
4 changes: 2 additions & 2 deletions src/pymadng/madp_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
# TODO: Make it so that MAD does the loop for variables not python (speed)
# TODO: Review recv_and exec:
"""
Default arguments are evaluated once at module load time.
This may cause problems if the argument is a mutable object such as a list or a dictionary.
Default arguments are evaluated once at module load time.
This may cause problems if the argument is a mutable object such as a list or a dictionary.
If the function modifies the object (e.g., by appending an item to a list), the default value is modified.
Source: https://google.github.io/styleguide/pyguide.html
"""
Expand Down
80 changes: 79 additions & 1 deletion tests/obj_tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import unittest, os, time
import unittest, os, time, sys, tfs, pandas

from pymadng import MAD
from pymadng.madp_classes import madhl_ref, madhl_obj, madhl_fun
Expand Down Expand Up @@ -299,6 +299,84 @@ def test_dir(self):
self.assertEqual(dir(mad.quadrupole(knl=[0, 0.3], l = 1)), quad_exp) #Dir of instance of class should be the same as the class
self.assertEqual(dir(mad.quadrupole(asd = 10, qwe = 20)), sorted(quad_exp + ["asd", "qwe"])) #Adding to the instance should change the dir

class TestDataFrame(unittest.TestCase):

def generalDataFrame(self, headers, DataFrame):
mad = MAD()
mad.send("""
test = mtable{
{"string"}, "number", "integer", "complex", "boolean", "list", "table", "range",! "generator",
name = "test",
header = {"string", "number", "integer", "complex", "boolean", "list", "table", "range"},
string = "string",
number = 1.234567890,
integer = 12345670,
complex = 1.3 + 1.2i,
boolean = true,
list = {1, 2, 3, 4, 5},
table = {1, 2, ["key"] = "value"},
range = 1..11,
}
+ {"a", 1.1, 1, 1 + 2i, true , {1, 2 }, {1 , 2 , ["3" ] = 3 }, 1..11,}
+ {"b", 2.2, 2, 2 + 3i, false, {3, 4 }, {4 , 5 , ["6" ] = 6 }, 2..12,}
+ {"c", 3.3, 3, 3 + 4i, true , {5, 6 }, {7 , 8 , ["9" ] = 9 }, 3..13,}
+ {"d", 4.4, 4, 4 + 5i, false, {7, 8 }, {10, 11, ["12"] = 12}, 4..14,}
+ {"e", 5.5, 5, 5 + 6i, true , {9, 10}, {13, 14, ["15"] = 15}, 5..15,}
test:addcol("generator", \\ri, m -> m:getcol("number")[ri] + 1i * m:getcol("number")[ri])
test:write("test")
"""
)
df = mad.test.to_df()
self.assertTrue(isinstance(df, DataFrame))
self.assertEqual(getattr(df, headers)["name"], "test")
self.assertEqual(getattr(df, headers)["string"], "string")
self.assertEqual(getattr(df, headers)["number"], 1.234567890)
self.assertEqual(getattr(df, headers)["integer"], 12345670)
self.assertEqual(getattr(df, headers)["complex"], 1.3 + 1.2j)
self.assertEqual(getattr(df, headers)["boolean"], True)
self.assertEqual(getattr(df, headers)["list"], [1, 2, 3, 4, 5])
lst, hsh = getattr(df, headers)["table"]
self.assertEqual(lst, [1, 2])
self.assertEqual(hsh["key"], "value")

self.assertEqual(df["string"].tolist(), ["a", "b", "c", "d", "e"])
self.assertEqual(df["number"].tolist(), [1.1, 2.2, 3.3, 4.4, 5.5])
self.assertEqual(df["integer"].tolist(), [1, 2, 3, 4, 5])
self.assertEqual(df["complex"].tolist(), [1 + 2j, 2 + 3j, 3 + 4j, 4 + 5j, 5 + 6j])
self.assertEqual(df["boolean"].tolist(), [True, False, True, False, True])
self.assertEqual(df["list"].tolist(), [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])
tbl = df["table"].tolist()
for i in range(len(tbl)):
lst, hsh = tbl[i]
self.assertEqual(lst, [i*3 + 1, i*3 + 2])
self.assertEqual(hsh[str((i+1) * 3)], (i+1) * 3)
self.assertEqual(
df["range"].tolist(),
[range(1, 12), range(2, 13), range(3, 14), range(4, 15), range(5, 16)]
)

def testTfsDataFrame(self):
self.generalDataFrame("headers", tfs.TfsDataFrame)

def testPandasDataFrame(self):
sys.modules["tfs"] = None #Remove tfs-pandas
self.generalDataFrame("attrs", pandas.DataFrame)
del sys.modules["tfs"]

def testFailure(self):
with MAD() as mad:
mad.send("""
test = mtable{"string", "number"} + {"a", 1.1} + {"b", 2.2}
""")
pandas = sys.modules["pandas"]
sys.modules["pandas"] = None
self.assertRaises(ImportError, lambda: mad.test.to_df())
sys.modules["pandas"] = pandas
df = mad.test.to_df()
self.assertTrue(isinstance(df, tfs.TfsDataFrame))
self.assertEqual(df["string"].tolist(), ["a", "b"])
self.assertEqual(df["number"].tolist(), [1.1, 2.2])

class TestSpeed(unittest.TestCase):

Expand Down

0 comments on commit f2ebac4

Please sign in to comment.