Skip to content

Commit

Permalink
Merge pull request #141 from jacanchaplais/feature/edge-relabel-140
Browse files Browse the repository at this point in the history
Relabel edges from in / out to src / dst #140
  • Loading branch information
jacanchaplais authored Aug 16, 2023
2 parents 3f29ace + 46064b5 commit b6ddacd
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 201 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-to-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
jobs:
build-n-publish:
name: Build and publish to PyPI
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with: { fetch-depth: 0 }
Expand Down
2 changes: 1 addition & 1 deletion graphicle/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class EventInterface(ty.Protocol):
Status codes annotating particles describing their generation.
final : ndarray[bool_]
Mask identifying final particles in the event's ancestry.
edges : ndarray[int32], fields ("in", "out")
edges : ndarray[int32], fields ("src", "dst")
Ancestry of particles in event, encoded as a COO list of
integers, describing a graph structure.
"""
Expand Down
82 changes: 2 additions & 80 deletions graphicle/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,10 @@
import warnings
from functools import lru_cache, partial

import deprecation
import networkx as nx
import numba as nb
import numpy as np
import numpy.lib.recfunctions as rfn
import pyjet
from pyjet import ClusterSequence, PseudoJet

from . import base

Expand All @@ -33,7 +30,6 @@
"rapidity_centre",
"combined_mass",
"flow_trace",
"cluster_pmu",
"aggregate_momenta",
]

Expand Down Expand Up @@ -280,9 +276,9 @@ def flow_trace(
list(target), blacklist=False, sign_sensitive=True
)
hard_graph = hard_graph[target_mask]
names, vtxs = tuple(hard_graph.pdg.name), tuple(hard_graph.edges["out"])
names, vtxs = tuple(hard_graph.pdg.name), tuple(hard_graph.edges["dst"])
# out vertices of user specified particles
focus_pcls = graph.edges[mask]["out"]
focus_pcls = graph.edges[mask]["dst"]
trc = np.array(
[
_trace_vector(
Expand All @@ -304,80 +300,6 @@ def flow_trace(
return traces


@deprecation.deprecated(
deprecated_in="0.2.3",
removed_in="0.3.0",
details="Use ``graphicle.select.fastjet_clusters()`` instead.",
)
def cluster_pmu(
pmu: "MomentumArray",
radius: float,
p_val: float,
pt_cut: ty.Optional[float] = None,
eta_cut: ty.Optional[float] = None,
) -> "MaskGroup":
"""Clusters particles using the generalised-kt algorithm.
:group: calculate
.. versionadded:: 0.1.4
Parameters
----------
pmu: MomentumArray
The momenta of each particle in the point cloud.
radius : float
The radius of the clusters to be produced.
p_val : float
The exponent parameter determining the transverse momentum (pt)
dependence of iterative pseudojet merges. Positive values
cluster low pt particles first, positive values cluster high pt
particles first, and a value of zero corresponds to no pt
dependence.
pt_cut : float, optional
Jet transverse momentum threshold, below which jets will be
discarded.
eta_cut : float, optional
Jet pseudorapidity threshold, above which jets will be
discarded.
Returns
-------
clusters : MaskGroup
MaskGroup object, containing boolean masks over the input data
for each jet clustering.
Notes
-----
This is a wrapper around FastJet's implementation.
``p_val`` set to ``-1`` gives anti-kT, ``0`` gives Cambridge-Aachen,
and ``1`` gives kT clusterings.
"""
from graphicle.data import MaskGroup

pmu_pyjet = pmu.data[["e", "x", "y", "z"]]
pmu_pyjet.dtype.names = "E", "px", "py", "pz"
pmu_pyjet_idx = rfn.append_fields(
pmu_pyjet, "idx", np.arange(len(pmu_pyjet))
)
sequence: ClusterSequence = pyjet.cluster(
pmu_pyjet_idx, R=radius, p=p_val, ep=True
)
jets: ty.Iterable[PseudoJet] = sequence.inclusive_jets()
if pt_cut is not None:
jets = filter(lambda jet: jet.pt > pt_cut, jets)
if eta_cut is not None:
jets = filter(lambda jet: abs(jet.eta) < eta_cut, jets)
cluster_mask = MaskGroup()
cluster_mask.agg_op = "or"
for i, jet in enumerate(jets):
mask = np.zeros_like(pmu_pyjet, dtype="<?")
mask[list(map(lambda pcl: pcl.idx, jet))] = True # type: ignore
cluster_mask[f"{i}"] = mask
return cluster_mask


@nb.vectorize([nb.float64(nb.float64, nb.float64)])
def _root_diff_two_squares(
x1: base.DoubleUfunc, x2: base.DoubleUfunc
Expand Down
140 changes: 33 additions & 107 deletions graphicle/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
from copy import deepcopy
from enum import Enum

import deprecation
import numpy as np
import numpy.typing as npt
from attr import Factory, cmp_using, define, field, setters
Expand Down Expand Up @@ -94,7 +93,9 @@
def _map_invert(mapping: ty.Dict[str, ty.Set[str]]) -> ty.Dict[str, str]:
return dict(
it.chain.from_iterable(
map(it.product, mapping.values(), mapping.keys())
it.starmap(
lambda key, val: zip(val, it.repeat(key)), mapping.items()
)
)
)

Expand All @@ -104,12 +105,12 @@ def _map_invert(mapping: ty.Dict[str, ty.Set[str]]) -> ty.Dict[str, str]:
"x": {"x", "px"},
"y": {"y", "py"},
"z": {"z", "pz"},
"e": {"e", "pe", "tau", "t"},
"e": {"e", "pe"},
}
)
_MOMENTUM_ORDER = tuple("xyze")
_EDGE_MAP = _map_invert({"in": {"in", "src"}, "out": {"out", "dst"}})
_EDGE_ORDER = ("in", "out")
_EDGE_MAP = _map_invert({"src": {"in", "src"}, "dst": {"out", "dst"}})
_EDGE_ORDER = ("src", "dst")


class MomentumElement(ty.NamedTuple):
Expand Down Expand Up @@ -297,6 +298,20 @@ def _reorder_pmu(array: base.VoidVector) -> base.VoidVector:
return array[name_reorder]


def _reorder_edges(array: base.VoidVector) -> base.VoidVector:
"""Ensures passed structured arrays of COO edges have their fields
mapped in the same order as graphicle's ``AdjacencyList`` underlying
convention.
"""
names = array.dtype.names
assert names is not None
if names == _EDGE_ORDER:
return array
gcl_to_ext = dict(zip(map(_EDGE_MAP.__getitem__, names), names))
name_reorder = list(map(gcl_to_ext.__getitem__, _EDGE_ORDER))
return array[name_reorder]


def _row_contiguous(func: ty.Callable[..., base.AnyVector]):
"""Decorator to ensure that numpy arrays returned from functions are
row-contiguous.
Expand Down Expand Up @@ -349,6 +364,8 @@ def converter(values: npt.ArrayLike) -> base.AnyVector:
assert names is not None
if num_cols == 4:
values = _reorder_pmu(values)
elif (num_cols == 2) and (set(_EDGE_MAP.keys()) & set(names)):
values = _reorder_edges(values)
values = rfn.structured_to_unstructured(values)
array = np.asarray(values, dtype=dtype)
shape = array.shape
Expand Down Expand Up @@ -762,18 +779,6 @@ def copy(self) -> "MaskGroup[MaskGeneric]":
agg_op=self._agg_op, # type: ignore
)

@property
@deprecation.deprecated(
deprecated_in="0.2.6",
removed_in="0.3.0",
details="Use ``MaskGroup.keys()`` instead.",
)
def names(self) -> ty.List[str]:
"""Provides the string values of the keys to the top-level
nested ``MaskBase`` objects as a list.
"""
return list(self._mask_arrays.keys())

def bitwise_or(self) -> MaskArray:
return MaskArray(
np.bitwise_or.reduce( # type: ignore
Expand Down Expand Up @@ -805,16 +810,6 @@ def data(self) -> base.BoolVector:
"Please contact developers with a bug report."
)

@property
@deprecation.deprecated(
deprecated_in="0.2.6",
removed_in="0.3.0",
details="Use ``MaskGroup.to_dict()`` instead.",
)
def dict(self) -> ty.Dict[str, base.BoolVector]:
"""Masks nested in a dictionary instead of a ``MaskGroup``."""
return {key: val.data for key, val in self._mask_arrays.items()}

def to_dict(self) -> ty.Dict[str, base.BoolVector]:
"""Masks nested in a dictionary instead of a ``MaskGroup``."""
return {key: val.data for key, val in self._mask_arrays.items()}
Expand Down Expand Up @@ -2038,11 +2033,14 @@ class AdjacencyList(base.AdjacencyBase):
.. versionchanged:: 0.2.4
Added internal numpy interfaces for greater interoperability.
.. versionchanged:: 0.3.0
Renamed "in" / "out" fields to "src" / "dst".
Parameters
----------
data : ndarray[int32] or ndarray[void]
COO formatted edge pairs, either given as a (n-2)-dimensional
array, or a structured array with field names ``('in', 'out')``.
array, or a structured array with field names ``('src', 'dst')``.
weights : np.ndarray[float64]
Weights attributed to each edge in the COO list.
Expand All @@ -2058,7 +2056,7 @@ class AdjacencyList(base.AdjacencyBase):
_HANDLED_TYPES: ty.Tuple[ty.Type, ...] = field(init=False, repr=False)

def __attrs_post_init__(self):
self.dtype = np.dtype(list(zip(("in", "out"), ("<i4",) * 2)))
self.dtype = np.dtype(list(zip(("src", "dst"), ("<i4",) * 2)))
self._HANDLED_TYPES = (np.ndarray, nm.Number, cla.Sequence)

@property
Expand Down Expand Up @@ -2144,7 +2142,7 @@ def data(self) -> base.VoidVector:

@property
def edges(self) -> base.VoidVector:
"""COO edge list, with field names ``('in', 'out')``."""
"""COO edge list, with field names ``('src', 'dst')``."""
return self.data

@property
Expand Down Expand Up @@ -2263,90 +2261,18 @@ def to_sparse(self, data: ty.Optional[base.AnyVector] = None) -> coo_array:
Returns
-------
coo_array
COO-formatted sparse array, where rows are ``"in"`` and cols
are ``"out"`` indices for ``AdjacencyList.edges``.
COO-formatted sparse array, where rows are ``"src"`` and cols
are ``"dst"`` indices for ``AdjacencyList.edges``.
"""
abs_edges = self._edge_relabel
size = np.max(abs_edges) + 1
if data is None:
data = np.sign(self.data["out"]) == 1
data = np.sign(self.data["dst"]) == 1
return coo_array(
(data, (abs_edges[:, 0], abs_edges[:, 1])),
shape=(size, size),
)

@deprecation.deprecated(
deprecated_in="0.2.6",
removed_in="0.3.0",
details="Inconsistent behaviour, too niche to maintain. Use "
"other interfaces, such as ``AdjacencyList.to_sparse() "
"or ``AdjacencyList.matrix``, instead.",
)
def to_dicts(
self,
edge_data: ty.Optional[
ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]]
] = None,
node_data: ty.Optional[
ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]]
] = None,
) -> AdjDict:
"""Returns data in dictionary format, which is more easily
parsed by external libraries, such as ``NetworkX``.
Parameters
----------
edge_data : dict[str, ArrayBase | ndarray[any]], optional
Values to encode on each edge.
node_data : dict[str, ArrayBase | ndarray[any]], optional
Values to encode on each node.
Returns
-------
AdjDict
Dictionary with ``"edges"`` and ``"nodes"`` keys, which
refer to a tuple of tuples, representing the edges or nodes
of the graph. Associated data is encoded in the final
element of each of the innermost tuples, which is a
dictionary providing labels and values for the data.
"""
if edge_data is None:
edge_data = dict()
if node_data is None:
node_data = dict()

def make_data_dicts(
orig: ty.Tuple[ty.Any, ...],
data: ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]],
):
def array_iterator(array_dict):
for array in array_dict.values():
if isinstance(array, base.ArrayBase):
yield array.data
elif isinstance(array, np.ndarray):
yield array
else:
raise TypeError("Data structure not supported.")

data_rows = zip(*array_iterator(data))
dicts = (dict(zip(data.keys(), row)) for row in data_rows)
combo = it.zip_longest(*orig, dicts, fillvalue=dict())
return tuple(combo) # type: ignore

# form edges with data for easier ancestry tracking
edges = make_data_dicts(
orig=(self.edges["in"], self.edges["out"]),
data=edge_data,
)
nodes = make_data_dicts(
orig=(self.nodes,),
data=node_data,
)
return dict(
edges=edges,
nodes=nodes,
)


@define
class Graphicle:
Expand Down Expand Up @@ -2460,7 +2386,7 @@ def from_numpy(
COO formatted pairs of vertex ids, of shape ``(N, 2)``,
where ``N`` is the number of particles in the graph.
Alternatively, supply a structured array with field names
``('in', 'out')``.
``('src', 'dst')``.
weights : ndarray[Number], optional
Weights to be associated with each edge in the COO edge
list, provided in the same order.
Expand Down Expand Up @@ -2530,7 +2456,7 @@ def final(self) -> MaskArray:

@property
def edges(self) -> base.VoidVector:
"""COO edge list, with field names ``('in', 'out')``."""
"""COO edge list, with field names ``('src', 'dst')``."""
return self.adj.edges

@property
Expand Down
Loading

0 comments on commit b6ddacd

Please sign in to comment.