diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 0b54573..87179db 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -8,7 +8,7 @@ on: jobs: build-n-publish: name: Build and publish to PyPI - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: { fetch-depth: 0 } diff --git a/graphicle/base.py b/graphicle/base.py index 7808a27..93a80c4 100644 --- a/graphicle/base.py +++ b/graphicle/base.py @@ -67,7 +67,7 @@ class EventInterface(ty.Protocol): Status codes annotating particles describing their generation. final : ndarray[bool_] Mask identifying final particles in the event's ancestry. - edges : ndarray[int32], fields ("in", "out") + edges : ndarray[int32], fields ("src", "dst") Ancestry of particles in event, encoded as a COO list of integers, describing a graph structure. """ diff --git a/graphicle/calculate.py b/graphicle/calculate.py index b7043de..af490d3 100644 --- a/graphicle/calculate.py +++ b/graphicle/calculate.py @@ -14,13 +14,10 @@ import warnings from functools import lru_cache, partial -import deprecation import networkx as nx import numba as nb import numpy as np import numpy.lib.recfunctions as rfn -import pyjet -from pyjet import ClusterSequence, PseudoJet from . import base @@ -33,7 +30,6 @@ "rapidity_centre", "combined_mass", "flow_trace", - "cluster_pmu", "aggregate_momenta", ] @@ -280,9 +276,9 @@ def flow_trace( list(target), blacklist=False, sign_sensitive=True ) hard_graph = hard_graph[target_mask] - names, vtxs = tuple(hard_graph.pdg.name), tuple(hard_graph.edges["out"]) + names, vtxs = tuple(hard_graph.pdg.name), tuple(hard_graph.edges["dst"]) # out vertices of user specified particles - focus_pcls = graph.edges[mask]["out"] + focus_pcls = graph.edges[mask]["dst"] trc = np.array( [ _trace_vector( @@ -304,80 +300,6 @@ def flow_trace( return traces -@deprecation.deprecated( - deprecated_in="0.2.3", - removed_in="0.3.0", - details="Use ``graphicle.select.fastjet_clusters()`` instead.", -) -def cluster_pmu( - pmu: "MomentumArray", - radius: float, - p_val: float, - pt_cut: ty.Optional[float] = None, - eta_cut: ty.Optional[float] = None, -) -> "MaskGroup": - """Clusters particles using the generalised-kt algorithm. - - :group: calculate - - .. versionadded:: 0.1.4 - - Parameters - ---------- - pmu: MomentumArray - The momenta of each particle in the point cloud. - radius : float - The radius of the clusters to be produced. - p_val : float - The exponent parameter determining the transverse momentum (pt) - dependence of iterative pseudojet merges. Positive values - cluster low pt particles first, positive values cluster high pt - particles first, and a value of zero corresponds to no pt - dependence. - pt_cut : float, optional - Jet transverse momentum threshold, below which jets will be - discarded. - eta_cut : float, optional - Jet pseudorapidity threshold, above which jets will be - discarded. - - Returns - ------- - clusters : MaskGroup - MaskGroup object, containing boolean masks over the input data - for each jet clustering. - - Notes - ----- - This is a wrapper around FastJet's implementation. - - ``p_val`` set to ``-1`` gives anti-kT, ``0`` gives Cambridge-Aachen, - and ``1`` gives kT clusterings. - """ - from graphicle.data import MaskGroup - - pmu_pyjet = pmu.data[["e", "x", "y", "z"]] - pmu_pyjet.dtype.names = "E", "px", "py", "pz" - pmu_pyjet_idx = rfn.append_fields( - pmu_pyjet, "idx", np.arange(len(pmu_pyjet)) - ) - sequence: ClusterSequence = pyjet.cluster( - pmu_pyjet_idx, R=radius, p=p_val, ep=True - ) - jets: ty.Iterable[PseudoJet] = sequence.inclusive_jets() - if pt_cut is not None: - jets = filter(lambda jet: jet.pt > pt_cut, jets) - if eta_cut is not None: - jets = filter(lambda jet: abs(jet.eta) < eta_cut, jets) - cluster_mask = MaskGroup() - cluster_mask.agg_op = "or" - for i, jet in enumerate(jets): - mask = np.zeros_like(pmu_pyjet, dtype=" ty.Dict[str, str]: return dict( it.chain.from_iterable( - map(it.product, mapping.values(), mapping.keys()) + it.starmap( + lambda key, val: zip(val, it.repeat(key)), mapping.items() + ) ) ) @@ -104,12 +105,12 @@ def _map_invert(mapping: ty.Dict[str, ty.Set[str]]) -> ty.Dict[str, str]: "x": {"x", "px"}, "y": {"y", "py"}, "z": {"z", "pz"}, - "e": {"e", "pe", "tau", "t"}, + "e": {"e", "pe"}, } ) _MOMENTUM_ORDER = tuple("xyze") -_EDGE_MAP = _map_invert({"in": {"in", "src"}, "out": {"out", "dst"}}) -_EDGE_ORDER = ("in", "out") +_EDGE_MAP = _map_invert({"src": {"in", "src"}, "dst": {"out", "dst"}}) +_EDGE_ORDER = ("src", "dst") class MomentumElement(ty.NamedTuple): @@ -297,6 +298,20 @@ def _reorder_pmu(array: base.VoidVector) -> base.VoidVector: return array[name_reorder] +def _reorder_edges(array: base.VoidVector) -> base.VoidVector: + """Ensures passed structured arrays of COO edges have their fields + mapped in the same order as graphicle's ``AdjacencyList`` underlying + convention. + """ + names = array.dtype.names + assert names is not None + if names == _EDGE_ORDER: + return array + gcl_to_ext = dict(zip(map(_EDGE_MAP.__getitem__, names), names)) + name_reorder = list(map(gcl_to_ext.__getitem__, _EDGE_ORDER)) + return array[name_reorder] + + def _row_contiguous(func: ty.Callable[..., base.AnyVector]): """Decorator to ensure that numpy arrays returned from functions are row-contiguous. @@ -349,6 +364,8 @@ def converter(values: npt.ArrayLike) -> base.AnyVector: assert names is not None if num_cols == 4: values = _reorder_pmu(values) + elif (num_cols == 2) and (set(_EDGE_MAP.keys()) & set(names)): + values = _reorder_edges(values) values = rfn.structured_to_unstructured(values) array = np.asarray(values, dtype=dtype) shape = array.shape @@ -762,18 +779,6 @@ def copy(self) -> "MaskGroup[MaskGeneric]": agg_op=self._agg_op, # type: ignore ) - @property - @deprecation.deprecated( - deprecated_in="0.2.6", - removed_in="0.3.0", - details="Use ``MaskGroup.keys()`` instead.", - ) - def names(self) -> ty.List[str]: - """Provides the string values of the keys to the top-level - nested ``MaskBase`` objects as a list. - """ - return list(self._mask_arrays.keys()) - def bitwise_or(self) -> MaskArray: return MaskArray( np.bitwise_or.reduce( # type: ignore @@ -805,16 +810,6 @@ def data(self) -> base.BoolVector: "Please contact developers with a bug report." ) - @property - @deprecation.deprecated( - deprecated_in="0.2.6", - removed_in="0.3.0", - details="Use ``MaskGroup.to_dict()`` instead.", - ) - def dict(self) -> ty.Dict[str, base.BoolVector]: - """Masks nested in a dictionary instead of a ``MaskGroup``.""" - return {key: val.data for key, val in self._mask_arrays.items()} - def to_dict(self) -> ty.Dict[str, base.BoolVector]: """Masks nested in a dictionary instead of a ``MaskGroup``.""" return {key: val.data for key, val in self._mask_arrays.items()} @@ -2038,11 +2033,14 @@ class AdjacencyList(base.AdjacencyBase): .. versionchanged:: 0.2.4 Added internal numpy interfaces for greater interoperability. + .. versionchanged:: 0.3.0 + Renamed "in" / "out" fields to "src" / "dst". + Parameters ---------- data : ndarray[int32] or ndarray[void] COO formatted edge pairs, either given as a (n-2)-dimensional - array, or a structured array with field names ``('in', 'out')``. + array, or a structured array with field names ``('src', 'dst')``. weights : np.ndarray[float64] Weights attributed to each edge in the COO list. @@ -2058,7 +2056,7 @@ class AdjacencyList(base.AdjacencyBase): _HANDLED_TYPES: ty.Tuple[ty.Type, ...] = field(init=False, repr=False) def __attrs_post_init__(self): - self.dtype = np.dtype(list(zip(("in", "out"), (" base.VoidVector: @property def edges(self) -> base.VoidVector: - """COO edge list, with field names ``('in', 'out')``.""" + """COO edge list, with field names ``('src', 'dst')``.""" return self.data @property @@ -2263,90 +2261,18 @@ def to_sparse(self, data: ty.Optional[base.AnyVector] = None) -> coo_array: Returns ------- coo_array - COO-formatted sparse array, where rows are ``"in"`` and cols - are ``"out"`` indices for ``AdjacencyList.edges``. + COO-formatted sparse array, where rows are ``"src"`` and cols + are ``"dst"`` indices for ``AdjacencyList.edges``. """ abs_edges = self._edge_relabel size = np.max(abs_edges) + 1 if data is None: - data = np.sign(self.data["out"]) == 1 + data = np.sign(self.data["dst"]) == 1 return coo_array( (data, (abs_edges[:, 0], abs_edges[:, 1])), shape=(size, size), ) - @deprecation.deprecated( - deprecated_in="0.2.6", - removed_in="0.3.0", - details="Inconsistent behaviour, too niche to maintain. Use " - "other interfaces, such as ``AdjacencyList.to_sparse() " - "or ``AdjacencyList.matrix``, instead.", - ) - def to_dicts( - self, - edge_data: ty.Optional[ - ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]] - ] = None, - node_data: ty.Optional[ - ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]] - ] = None, - ) -> AdjDict: - """Returns data in dictionary format, which is more easily - parsed by external libraries, such as ``NetworkX``. - - Parameters - ---------- - edge_data : dict[str, ArrayBase | ndarray[any]], optional - Values to encode on each edge. - node_data : dict[str, ArrayBase | ndarray[any]], optional - Values to encode on each node. - - Returns - ------- - AdjDict - Dictionary with ``"edges"`` and ``"nodes"`` keys, which - refer to a tuple of tuples, representing the edges or nodes - of the graph. Associated data is encoded in the final - element of each of the innermost tuples, which is a - dictionary providing labels and values for the data. - """ - if edge_data is None: - edge_data = dict() - if node_data is None: - node_data = dict() - - def make_data_dicts( - orig: ty.Tuple[ty.Any, ...], - data: ty.Dict[str, ty.Union[base.ArrayBase, base.AnyVector]], - ): - def array_iterator(array_dict): - for array in array_dict.values(): - if isinstance(array, base.ArrayBase): - yield array.data - elif isinstance(array, np.ndarray): - yield array - else: - raise TypeError("Data structure not supported.") - - data_rows = zip(*array_iterator(data)) - dicts = (dict(zip(data.keys(), row)) for row in data_rows) - combo = it.zip_longest(*orig, dicts, fillvalue=dict()) - return tuple(combo) # type: ignore - - # form edges with data for easier ancestry tracking - edges = make_data_dicts( - orig=(self.edges["in"], self.edges["out"]), - data=edge_data, - ) - nodes = make_data_dicts( - orig=(self.nodes,), - data=node_data, - ) - return dict( - edges=edges, - nodes=nodes, - ) - @define class Graphicle: @@ -2460,7 +2386,7 @@ def from_numpy( COO formatted pairs of vertex ids, of shape ``(N, 2)``, where ``N`` is the number of particles in the graph. Alternatively, supply a structured array with field names - ``('in', 'out')``. + ``('src', 'dst')``. weights : ndarray[Number], optional Weights to be associated with each edge in the COO edge list, provided in the same order. @@ -2530,7 +2456,7 @@ def final(self) -> MaskArray: @property def edges(self) -> base.VoidVector: - """COO edge list, with field names ``('in', 'out')``.""" + """COO edge list, with field names ``('src', 'dst')``.""" return self.adj.edges @property diff --git a/graphicle/select.py b/graphicle/select.py index b751d51..0e8b8f9 100644 --- a/graphicle/select.py +++ b/graphicle/select.py @@ -194,8 +194,8 @@ def vtx_pdg_pivot(direction: str): aggfunc=lambda x: tuple(x.to_list()), ) - pcls_in = vtx_pdg_pivot("in") - pcls_out = vtx_pdg_pivot("out") + pcls_in = vtx_pdg_pivot("src") + pcls_out = vtx_pdg_pivot("dst") # join in and out vertex pdgs into single dataframe vtxs = pcls_out.join( pcls_in, how="outer", lsuffix="_in", rsuffix="_out" @@ -246,9 +246,9 @@ def vertex_descendants(adj: gcl.AdjacencyList, vertex: int) -> gcl.MaskArray: Boolean mask over the graphicle objects associated with the passed AdjacencyList. """ - vertex_mask = adj.edges["in"] == vertex + vertex_mask = adj.edges["src"] == vertex if not np.any(vertex_mask): - return gcl.MaskArray(adj.edges["out"] == vertex) + return gcl.MaskArray(adj.edges["dst"] == vertex) sparse = adj._sparse_signed vertex = sparse.row[vertex_mask][0] bft = breadth_first_tree(adj._sparse_csr, vertex) @@ -308,7 +308,7 @@ def hadron_vertices( Indices of the hadronisation vertices in the generation DAG, returned in no particular order. """ - vtx_arr = np.unique(adj.edges[status.in_range(80, 90)]["in"]) + vtx_arr = np.unique(adj.edges[status.in_range(80, 90)]["src"]) return tuple(map(int, vtx_arr)) @@ -335,7 +335,7 @@ def _hadron_vtx_parton_iter( vertices which have radiation from the hard process incident. """ vtxs = hadron_vertices(adj, status) - in_parton_masks = map(np.equal, vtxs, it.repeat(adj.edges["out"])) + in_parton_masks = map(np.equal, vtxs, it.repeat(adj.edges["dst"])) in_parton_masks, in_parton_masks_ = it.tee(in_parton_masks) hard_overlap = map(np.bitwise_and, in_parton_masks_, it.repeat(from_hard)) has_hard_incident = map(np.any, hard_overlap) @@ -432,7 +432,7 @@ def partition_descendants( vtx_desc = vertex_descendants(graph.adj, vtx_id) for name, branch in hier.items(): for _, mask in _leaf_mask_iter(name, branch): - if vtx_id not in graph.edges["out"][mask]: + if vtx_id not in graph.edges["dst"][mask]: continue mask.data = _partition_vertex( mask, @@ -671,7 +671,7 @@ def hard_descendants( raise ValueError(f"Missing PDGs in hard process: {missing_str}.") hard_graph = hard_graph[target_mask] pdg_keys = _pdgs_to_keys(hard_graph.pdg) - pcl_out_vtxs = map(int, hard_graph.edges["out"]) + pcl_out_vtxs = map(int, hard_graph.edges["dst"]) descs = map(vertex_descendants, it.repeat(graph.adj), pcl_out_vtxs) group = gcl.MaskGroup(cl.OrderedDict(zip(pdg_keys, descs)), agg_op="or") for key, idx in zip(pdg_keys, np.flatnonzero(hard_mask)): @@ -832,8 +832,8 @@ def _make_tree( ) else: mask = desc[parton] - in_edge = adj.edges["in"][mask][0] - initial_parton_mask = adj.edges["out"] == in_edge + in_edge = adj.edges["src"][mask][0] + initial_parton_mask = adj.edges["dst"] == in_edge if np.sum(initial_parton_mask) == 1: mask.data[initial_parton_mask] = True branch[parton] = mask diff --git a/graphicle/transform.py b/graphicle/transform.py index 4e30183..1e3277e 100644 --- a/graphicle/transform.py +++ b/graphicle/transform.py @@ -70,8 +70,8 @@ def check_sign(x): # node labels set as particle indices in original edge array for i, node_triplet in enumerate(edges_as_nodes): key = node_triplet["key"] - node = node_triplet[["in", "out"]] - sign = -1 * check_sign(node["in"] * node["out"]) + node = node_triplet[["src", "dst"]] + sign = -1 * check_sign(node["src"] * node["dst"]) node_idxs[i] = sign * (np.where(edges == node)[0][key] + 1) nx_node_graph = _nx.relabel_nodes( nx_node_graph,