Skip to content

Commit

Permalink
vectorize attributes and edges
Browse files Browse the repository at this point in the history
  • Loading branch information
LegrandNico committed Oct 25, 2023
1 parent 5b9e102 commit 6299587
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 229 deletions.
81 changes: 74 additions & 7 deletions src/pyhgf/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def __init__(
self.update_type = update_type
self.verbose = verbose
self.n_levels = n_levels
self.n_nodes: int = 0
self.edges: Edges = ()
self.node_trajectories: Dict
self.attributes: Dict = {}
Expand Down Expand Up @@ -243,12 +244,19 @@ def init(self) -> "HGF":
sequence and before providing any input data.
"""

# create the update sequence automatically if not provided
if self.update_sequence is None:
self.set_update_sequence()
if self.verbose:
print("... Create the update sequence from the network structure.")

# convert the adjacency lists to adjacency matrices
self = self.adjacency_list_to_matrix()

# vectorize the attributes
self = self.attributes_to_matrix()

# create the belief propagation function that will be scaned
if self.scan_fn is None:
self.scan_fn = Partial(
Expand Down Expand Up @@ -554,7 +562,7 @@ def add_input_node(
raise Exception(
f"A node with index {input_idx} already exists in the HGF network."
)

self.n_nodes += 1
if input_idx == 0:
# this is the first node, create the node structure

Expand Down Expand Up @@ -638,10 +646,9 @@ def add_value_parent(
tonic_drift :
The drift of the random walk. Defaults to `0.0` (no drift).
autoregressive_coefficient :
The autoregressive coefficient is only used to parametrize the AR1 process
and represents the autoregressive coefficient. If
:math:`-1 \\le \\phi \\le 1`, the process is stationary and will revert to
the autoregressive intercept.
The autoregressive coefficient is only used to parametrize the AR1 process.
If :math:`-1 \\le \\phi \\le 1`, the process is stationary and will revert
to the autoregressive intercept.
autoregressive_intercept :
The parameter `m` is only used to parametrize the AR1 process and represents
the autoregressive intercept. If :math:`-1 \\le \\phi \\le 1`, this is the
Expand All @@ -654,8 +661,7 @@ def add_value_parent(
children_idxs = [children_idxs]

# how many nodes in structure
n_nodes = len(self.edges)
parent_idx = n_nodes # append a new parent in the structure
parent_idx = self.n_nodes # append a new parent in the structure

if parent_idx in self.attributes.keys():
raise Exception(
Expand Down Expand Up @@ -721,6 +727,8 @@ def add_value_parent(
# convert the list back to a tuple
self.edges = tuple(edges_as_list)

self.n_nodes += 1

return self

def add_volatility_parent(
Expand Down Expand Up @@ -846,6 +854,8 @@ def add_volatility_parent(
# convert the list back to a tuple
self.edges = tuple(edges_as_list)

self.n_nodes += 1

return self

def set_update_sequence(self) -> "HGF":
Expand All @@ -863,3 +873,60 @@ def set_update_sequence(self) -> "HGF":
self.update_sequence = tuple(prediction_sequence)

return self

def adjacency_list_to_matrix(self) -> "HGF":
"""Convert the edges from list representation to matrix representation."""
n_nodes = len(self.edges)
matrix_edges = {
"value_parents": np.zeros((n_nodes, n_nodes)),
"value_children": np.zeros((n_nodes, n_nodes)),
"volatility_parents": np.zeros((n_nodes, n_nodes)),
"volatility_children": np.zeros((n_nodes, n_nodes)),
}

for i, idx in enumerate(self.edges):
for k in matrix_edges.keys():
if eval(f"idx.{k}") is not None:
matrix_edges[k][i, eval(f"idx.{k}")] = 1

self.edges = matrix_edges

return self

def attributes_to_matrix(self) -> "HGF":
"""Vectorize the attributes of a probabilistic network."""

n_nodes = len(self.attributes)

# list all the attributes present in the network
keys = []
for i in range(n_nodes):
for key in self.attributes[i].keys():
if key not in keys:
keys.append(key)

# initialize to list of zeros
matrix_attributes = {key: [0] * n_nodes for key in keys}

# fill with the actual values
for i in range(n_nodes):
for key, value in self.attributes[i].items():
matrix_attributes[key][i] = value

# special cases for the coupling values
for key in [
"value_coupling_parents",
"volatility_coupling_children",
"volatility_coupling_parents",
"value_coupling_children",
]:
matrix_attributes[key] = np.ones((n_nodes, n_nodes))

# convert to numpy arrays
matrix_attributes = {
key: np.asarray(value) for key, value in matrix_attributes.items()
}

self.attributes = matrix_attributes

return self
34 changes: 15 additions & 19 deletions src/pyhgf/networks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pyhgf.model import HGF


@partial(jit, static_argnames=("update_sequence", "edges", "input_nodes_idx"))
@partial(jit, static_argnames=("update_sequence", "input_nodes_idx"))
def beliefs_propagation(
attributes: Dict,
data: ArrayLike,
Expand Down Expand Up @@ -442,11 +442,11 @@ def to_pandas(hgf: "HGF") -> pd.DataFrame:
# get time and time steps from the first input node
structure_df = pd.DataFrame(
{
"time_steps": hgf.node_trajectories[hgf.input_nodes_idx.idx[0]][
"time_step"
"time_steps": hgf.node_trajectories["time_step"][
:, hgf.input_nodes_idx.idx[0]
],
"time": jnp.cumsum(
hgf.node_trajectories[hgf.input_nodes_idx.idx[0]]["time_step"]
hgf.node_trajectories["time_step"][:, hgf.input_nodes_idx.idx[0]]
),
}
)
Expand All @@ -467,20 +467,16 @@ def to_pandas(hgf: "HGF") -> pd.DataFrame:
)
pd.concat([structure_df, df], axis=1)
else:
structure_df[f"observation_input_{idx}"] = hgf.node_trajectories[idx][
"value"
structure_df[f"observation_input_{idx}"] = hgf.node_trajectories["value"][
:, idx
]

# loop over non input nodes and store sufficient statistics with surprise
indexes = [
i
for i in range(1, len(hgf.node_trajectories))
if i not in hgf.input_nodes_idx.idx
]
indexes = [i for i in range(1, hgf.n_nodes) if i not in hgf.input_nodes_idx.idx]
df = pd.DataFrame(
dict(
[
(f"x_{i}_{var}", hgf.node_trajectories[i][var])
(f"x_{i}_{var}", hgf.node_trajectories[var][:, i])
for i in indexes
for var in ["mean", "precision", "expected_mean", "expected_precision"]
]
Expand All @@ -498,20 +494,20 @@ def to_pandas(hgf: "HGF") -> pd.DataFrame:
for i in indexes:
if i in continuous_parents_idxs:
surprise = gaussian_surprise(
x=hgf.node_trajectories[0]["value"],
expected_mean=hgf.node_trajectories[i]["expected_mean"],
expected_precision=hgf.node_trajectories[i]["expected_precision"],
x=hgf.node_trajectories["value"][:, 0],
expected_mean=hgf.node_trajectories["expected_mean"][:, i],
expected_precision=hgf.node_trajectories["expected_precision"][:, i],
)
else:
surprise = gaussian_surprise(
x=hgf.node_trajectories[i]["mean"],
expected_mean=hgf.node_trajectories[i]["expected_mean"],
expected_precision=hgf.node_trajectories[i]["expected_precision"],
x=hgf.node_trajectories["mean"][:, i],
expected_mean=hgf.node_trajectories["expected_mean"][:, i],
expected_precision=hgf.node_trajectories["expected_precision"][:, i],
)

# fill with nans when the model cannot fit
surprise = jnp.where(
jnp.isnan(hgf.node_trajectories[i]["mean"]), jnp.nan, surprise
jnp.isnan(hgf.node_trajectories["mean"][:, i]), jnp.nan, surprise
)
structure_df[f"x_{i}_surprise"] = surprise

Expand Down
21 changes: 8 additions & 13 deletions src/pyhgf/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,10 @@ def plot_trajectories(
"""
trajectories_df = hgf.to_pandas()
n_nodes = len(hgf.edges)
palette = itertools.cycle(sns.color_palette())

if axs is None:
_, axs = plt.subplots(nrows=n_nodes + 1, figsize=figsize, sharex=True)
_, axs = plt.subplots(nrows=hgf.n_nodes + 1, figsize=figsize, sharex=True)

# plot the input node(s)
# ----------------------
Expand All @@ -141,8 +140,8 @@ def plot_trajectories(

# plot continuous and binary nodes
# --------------------------------
ax_i = n_nodes - len(hgf.input_nodes_idx.idx) - 1
for node_idx in range(0, n_nodes):
ax_i = hgf.n_nodes - len(hgf.input_nodes_idx.idx) - 1
for node_idx in range(hgf.n_nodes):
if node_idx not in hgf.input_nodes_idx.idx:
# use different colors for each node
color = next(palette)
Expand All @@ -160,7 +159,7 @@ def plot_trajectories(

# plot the global surprise of the model
# -------------------------------------
surprise_ax = axs[n_nodes].twinx()
surprise_ax = axs[hgf.n_nodes].twinx()
surprise_ax.fill_between(
x=trajectories_df.time,
y1=trajectories_df.surprise,
Expand Down Expand Up @@ -458,25 +457,21 @@ def plot_nodes(

# if this is the value parent of an input node
# the CI should be treated diffeently
if hgf.edges[node_idx].value_children is not None:
if np.any(hgf.edges["value_children"][node_idx]):
if np.any(
[
(
i # type : ignore
in hgf.edges[ # type: ignore
node_idx # type: ignore
].value_children # type: ignore
in np.where(hgf.edges["value_children"][node_idx])[0]
)
and kind == "binary"
for i, kind in enumerate(hgf.input_nodes_idx.kind)
]
):
# get parent node
parent_idx = hgf.edges[ # type: ignore
node_idx # type: ignore
].value_parents[
parent_idx = np.where(hgf.edges["value_parents"][node_idx])[0][
0
] # type: ignore
]

# compute mu +/- sd at time t-1
# and use the sigmoid transform before plotting
Expand Down
Loading

0 comments on commit 6299587

Please sign in to comment.