Skip to content

Commit

Permalink
Merge pull request #14 from azizilab/cam_branch
Browse files Browse the repository at this point in the history
Cam branch
  • Loading branch information
cameronyoungpark authored Sep 13, 2024
2 parents 7f0feec + ac2754d commit 574aebd
Show file tree
Hide file tree
Showing 17 changed files with 11,525 additions and 191 deletions.
3,642 changes: 3,642 additions & 0 deletions cam.ipynb

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions diisco/diisco.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,11 +649,10 @@ def get_means(self, timepoints: torch.Tensor) -> Dict[str, torch.Tensor]:
sampling from the posterior. This function is currently only
supported while using a MultivariateNormalFactorized guide.
"""
if self.guide_name != "MultivariateNormalFactorized":
raise ValueError(
"The get_means function is only supported when using the "
"MultivariateNormalFactorized guide."
)
supported_guide = ["MultivariateNormalFactorized"]
if self.guide_name not in supported_guide:
raise ValueError(" Please use a supported guide: {}".format(supported_guide))


n_train_timepoints = self.train_timepoints.shape[0]
n_cell_types = self.n_cell_types
Expand Down
6 changes: 4 additions & 2 deletions diisco/guides.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,10 @@ def _init_params(self, timepoints, proportions, lambda_matrix):
assert init_W.shape == (n_cell_types, n_cell_types, n_timepoints)
assert init_B.shape == (n_cell_types, 1, n_timepoints)

init_W = torch.randn(n_cell_types, n_cell_types, n_timepoints) * 0.1
init_B = torch.randn(n_cell_types, 1, n_timepoints) * 0.1
# TODO: We are not using linear regression to initialize the
# parameters of the guide. We should remove the code here then.
# init_W = torch.randn(n_cell_types, n_cell_types, n_timepoints) * 0.1
# init_B = torch.randn(n_cell_types, 1, n_timepoints) * 0.1

return init_W, init_B

Expand Down
37 changes: 31 additions & 6 deletions evals_pkg/evals/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
evaluate_predicted_observations,
ObservationMetrics,
)
from evals.models import DiiscoModel
from evals.models import DiiscoModel, RollingLinearModel

import os
import namesgenerator
Expand Down Expand Up @@ -176,15 +176,25 @@ def make_model_config(model_cls: Model, args: argparse.Namespace) -> dict:
assert "y_length_scale" in config
config["y_length_scale"] = args.length_scale

# Add additional modle parameters
assert "y_sigma" in config
config["y_sigma"] = args.noise * 1.5

if model_cls == RollingLinearModel:
assert "epsilon" in config
config["epsilon"] = args.weights_length_scale

# Add additional model parameters
if args.model_parameters is not None:
for i in range(0, len(args.model_parameters), 2):
# Assume that the model parameters are in the form --<parameter-name> <parameter-value>
key = args.model_parameters[i]
value = args.model_parameters[i + 1]
assert key in config, f"Parameter {key} not found in the model configuration"
value_type = type(config[key])
config[key] = value_type(value)
if value_type == bool:
config[key] = value == "True"
else:
config[key] = value_type(value)

return config

Expand Down Expand Up @@ -221,20 +231,20 @@ def main():
model = model_cls(**config)

model.fit(
t=dataset.timepoints, y=dataset.observations, is_active=dataset.model_prior
t=dataset.timepoints, y=dataset.standardized_observations, is_active=dataset.model_prior
)

interaction_score = model.predict_obs_interactions()
interaction_metrics: InteractionMetrics = evaluate_predicted_interactions(
true_interactions=dataset.true_interactions,
interaction_score=interaction_score,
observations=dataset.observations,
observations=dataset.standardized_observations,
timepoints=dataset.timepoints,
)
# print("\n\ninteraction_score", interaction_score.astype(int)[0])
# print("\n\ntrue_interactions", dataset.true_interactions.astype(int)[0])

true_observations = dataset.observations
true_observations = dataset.standardized_observations
predicted_observations = model.predict_y_train()

# print("\n\ntrue_observations", true_observations)
Expand All @@ -254,6 +264,7 @@ def main():
mse=observation_metrics.mse,
rmse=observation_metrics.rmse,
mae=observation_metrics.mae,

# Metrics pertaining the interactions
stds=interaction_metrics.stds,
accuracies=interaction_metrics.accuracies,
Expand All @@ -264,6 +275,14 @@ def main():
prc_auc=interaction_metrics.prc_auc,
symmetrical_auc=interaction_metrics.symmetrical_auc,
symmetrical_prc_auc=interaction_metrics.symmetrical_prc_auc,
true_interactions=interaction_metrics.true_interactions,
transformed_interactions=interaction_metrics.transformed_interactions,
symmetrical_transformed_interactions=interaction_metrics.symmetrical_transformed_interactions,

# Predicted
predicted_interactions=interaction_score.tolist(),
predicted_observations=predicted_observations.tolist(),

# Metrics pertaining the dataset
n_cells=dataset.observations.shape[1],
n_timepoints=dataset.observations.shape[0],
Expand All @@ -275,6 +294,12 @@ def main():
flip_prob_inactive=args.flip_prob_inactive,
threshold_for_active=args.threshold,
seed=args.seed,

# Dataset stuff
weights=dataset.weights.tolist(),
standardized_observations=dataset.standardized_observations.tolist(),
observations=dataset.observations.tolist(),
timepoints=dataset.timepoints.tolist(),
# Stuff pertaining the model itself
model_name=args.model,
config=config,
Expand Down
5 changes: 4 additions & 1 deletion evals_pkg/evals/discretization.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def discretize(
"""
# Just for safety
transformed_interactions = np.abs(transformed_interactions)
# transformed_interactions = np.maximum(
# transformed_interactions, transformed_interactions.transpose(0, 2, 1)
# )
flat_cell_interactions = transformed_interactions.flatten()
if self.count_zeros:
cell_std = flat_cell_interactions.std()
Expand All @@ -76,7 +79,7 @@ def discretize(
discretized = (
transformed_interactions > self.std_deviations * cell_std
).astype(int)
return np.maximum(discretized, discretized.transpose(0, 2, 1))
return discretized

@abstractmethod
def transform_interactions(
Expand Down
81 changes: 54 additions & 27 deletions evals_pkg/evals/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def generate_data(
flip_prob_active: float = 0.1,
flip_prob_inactive: float = 0.1,
threshold_for_active: float = 0.1,
extra_cells = 0
) -> SampledData:
"""
Generates data for DIISCO
Expand All @@ -60,7 +61,9 @@ def generate_data(
The number of dependent variables per block
n_timepoints : int
The number of total timepoints
noise : float
noise_y : float
The noise in the data
noise_w : float
The noise in the data
length_scale : float
The length scale of the gaussian process
Expand All @@ -74,6 +77,9 @@ def generate_data(
threshold_for_active : float
How big should the absolute value of the weight be for the edge to be considered active.
extra_cells: int
extra cells that are not varying with time
Returns
------------------------
SampledData :
Expand All @@ -83,29 +89,30 @@ def generate_data(
np.random.seed(seed)

block_size = n_independent_per_block + n_dependent_per_block
total_cells = n_blocks * block_size
total_cells = n_blocks * block_size + extra_cells

weights = np.zeros((n_timepoints, total_cells, total_cells))
# m[t, i, j] = 1 if i is computed from j at time t
observations = np.zeros((n_timepoints, total_cells))

timepoints = np.linspace(0, 1, n_timepoints)

timepoints = np.random.uniform(0, 1, n_timepoints)
# Fill in the weights matrix
for i in range(total_cells):
for j in range(total_cells):
for i in range(total_cells-extra_cells):
for j in range(total_cells-extra_cells):
weights[:, i, j] = _sample_from_gp(
timepoints, weights_length_scale, noise
timepoints, weights_length_scale, noise#, LOC = 0, _scale = W_SIGMA
).flatten()

# Lets pretend that they all independent variables at the start
for cell in range(total_cells):
for cell in range(total_cells-extra_cells):
observations[:, cell] = _sample_from_gp(
timepoints, length_scale, noise
timepoints, length_scale, noise#, LOC = 0, _scale= Y_SIGMA
).flatten()

# Now we repace the dependent variables with actual values
for block in range(n_blocks):

independent_block = np.arange(
block * block_size, block * block_size + n_independent_per_block
)
Expand All @@ -114,12 +121,12 @@ def generate_data(
block * block_size + block_size,
)
other_blocks = np.setdiff1d(
np.arange(total_cells), np.concatenate([independent_block, dependent_block])
np.arange(total_cells-extra_cells), np.concatenate([independent_block, dependent_block])
)

for cell in independent_block:
observations[:, cell] = _sample_from_gp(
timepoints, length_scale, noise
timepoints, length_scale, noise#, LOC = 0, _scale= Y_SIGMA
).flatten()
weights[:, cell] = 0
weights[:, cell, cell] = 1
Expand All @@ -133,6 +140,12 @@ def generate_data(
np.random.randint(0, n_independent_per_block)
] = True


# observations[:, cell] = _sample_from_gp(
# timepoints, length_scale*10, noise
# ).flatten()

# else:
for timepoint in range(n_timepoints):
weights[timepoint, cell, independent_block] = weights[
timepoint, cell, independent_block
Expand All @@ -143,19 +156,28 @@ def generate_data(
weights[timepoint, cell, independent_block],
observations[timepoint, independent_block],
)
observations[timepoint, cell] = w @ obs + np.random.normal(0, noise)

observations[timepoint, cell] = w @ obs + np.random.normal(0, noise*10)
# Standardized observed
standardized_observations = (observations - np.mean(observations, axis=0)) / np.std(
observations, axis=0
)

for extra_cell in range(total_cells-extra_cells, total_cells):
print(extra_cell)
# new_scale = 0.00001
observations[:, extra_cell] = np.random.normal(scale = 0.01, size=(n_timepoints, 1)).flatten()
standardized_observations[:, extra_cell] = observations[:, extra_cell]
weights[:, extra_cell] = 0
weights[:, extra_cell, extra_cell] = 1

true_interactions = create_true_interactions_from_dependent_computations(
weights, threshold_for_active
)
model_prior = create_model_prior_from_true_interactions(
true_interactions, flip_prob_active, flip_prob_inactive
)

assert weights.shape == (n_timepoints, total_cells, total_cells)
assert observations.shape == (n_timepoints, total_cells)
assert timepoints.shape == (n_timepoints,)
Expand Down Expand Up @@ -243,19 +265,19 @@ def create_true_interactions_from_dependent_computations(

is_active_matrix = np.abs(weights) > threshold
# Ensure symmetry
adjacency_matrix = np.maximum(is_active_matrix, is_active_matrix.transpose(0, 2, 1))

for t in range(n_timepoints):
# Make sure diagonal is 1 for computation
adjacency_matrix[t] = np.maximum(adjacency_matrix[t], np.eye(total_cells))
reachability_matrix[t] = compute_reachability_matrix(adjacency_matrix[t])
# But also ensure it is 0 for the final result
reachability_matrix[t] = np.minimum(
reachability_matrix[t], 1 - np.eye(total_cells)
)
# adjacency_matrix = np.maximum(is_active_matrix, is_active_matrix.transpose(0, 2, 1))

# for t in range(n_timepoints):
# # Make sure diagonal is 1 for computation
# adjacency_matrix[t] = np.maximum(adjacency_matrix[t], np.eye(total_cells))
# reachability_matrix[t] = compute_reachability_matrix(adjacency_matrix[t])
# # But also ensure it is 0 for the final result
# reachability_matrix[t] = np.minimum(
# reachability_matrix[t], 1 - np.eye(total_cells)
# )

# set the diagonal to 0
true_interactions = reachability_matrix
true_interactions = is_active_matrix # test withotu the symmetry
original_diagonals = is_active_matrix[
:, np.arange(total_cells), np.arange(total_cells)
].astype(int)
Expand Down Expand Up @@ -291,7 +313,9 @@ def compute_reachability_matrix(


def _sample_from_gp(
x: Float[np.ndarray, "n_examples 1"], lengthscale: float, noise: float
x: Float[np.ndarray, "n_examples 1"], lengthscale: float, noise: float,
# _scale: float,
# LOC: float
) -> Float[np.ndarray, "n_examples 1"]:
"""
Sample from a Gaussian Process with a standard Gaussian (RBF) kernel.
Expand Down Expand Up @@ -336,6 +360,9 @@ def rbf_kernel(

# Sample from the multivariate normal distribution
L = np.linalg.cholesky(K + 1e-6 * np.eye(n_examples))
sample = L @ np.random.normal(size=(n_examples, 1))
sample = L @ np.random.normal(
# loc = 0,
# scale = _scale,
size=(n_examples, 1))

return sample
14 changes: 12 additions & 2 deletions evals_pkg/evals/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
import numpy as np


MIN_STDS = 0.1
MIN_STDS = 0.0
MAX_STDS = 5
N_STDS = 10
N_STDS = int((MAX_STDS - MIN_STDS) * 2 + 1) # (0, 0.5, 1, 1.5, ..., 5)


@dataclass
Expand All @@ -47,6 +47,12 @@ class InteractionMetrics:
symmetrical_prc_auc: float # same as prc_auc but we symmetrize the interactions
# The index of the best std according to the AUC
best_std: int
# True, transformed and non-transformed interactions
# are not flattened
true_interactions: list[int]
transformed_interactions: list[int]
symmetrical_transformed_interactions: list[int]



@dataclass
Expand Down Expand Up @@ -149,9 +155,13 @@ def evaluate_predicted_interactions(
best_std=best_std,
symmetrical_auc=symmetrical_auc,
symmetrical_prc_auc=symmetrical_prc_auc,
true_interactions=true_interactions.flatten().tolist(),
transformed_interactions=transformed_interactions.flatten().tolist(),
symmetrical_transformed_interactions= symmetrical_transformed_interactions.flatten().tolist()
)



def evaluate_predicted_observations(
true_observations: Float[ndarray, " n_timepoints n_cells"],
predicted_observations: Float[ndarray, " n_timepoints n_cells"],
Expand Down
Loading

0 comments on commit 574aebd

Please sign in to comment.