feat(models): add new func compute to run the model on a manual set…

… of params
mwong009 · Sep 14, 2023 · 5d1f8de · 5d1f8de
1 parent f5461cb
commit 5d1f8de
Show file tree

Hide file tree

Showing 2 changed files with 95 additions and 2 deletions.
diff --git a/pycmtensor/models/basic.py b/pycmtensor/models/basic.py
@@ -231,6 +231,58 @@ def drop_unused_variables(cost, params, variables):
         return [var for var in list(variables) if var not in symbols]
 
 
+def compute(model, ds, **params):
+    """Function for manual computation of model by specifying parameters as arguments
+
+    Args:
+        model (pycmtensor.models.BaseModel): model to train
+        ds (pycmtensor.dataset.Dataset): dataset to use for training
+        **params (dict): keyword arguments for model coefficients (`Params`)
+
+    Returns:
+        dict: model likelihood and error for training and validation datasets
+
+    Example:
+    ```
+    compute(model, ds, b_time=-5.009, b_purpose=0.307, asc_pt=-1.398, asc_drive=4.178,
+            asc_cycle=-3.996, b_car_own=1.4034)
+    ```
+    """
+    # saves original values and replace values by test values in params
+    p_value_old = {}
+    for p in model.params:
+        if p.name in params:
+            p_value_old[p.name] = p.get_value()
+            p.set_value(params[p.name])
+
+    # compute all the outputs of the training and validation datasets
+    x_y = model.x + [model.y]
+    train_data = ds.train_dataset(x_y)
+    valid_data = ds.valid_dataset(x_y)
+
+    t_index = np.arange(len(train_data[-1]))
+    v_index = np.arange(len(valid_data[-1]))
+
+    t_log_likelihood = model.log_likelihood_fn(*train_data, t_index)
+    t_error = model.prediction_error_fn(*train_data)
+
+    v_log_likelihood = model.log_likelihood_fn(*valid_data, v_index)
+    v_error = model.prediction_error_fn(*valid_data)
+
+    # put back original values
+    for p in model.params:
+        if p.name in p_value_old:
+            p.set_value(p_value_old[p.name])
+
+    # output results
+    return {
+        "train log likelihood": t_log_likelihood,
+        "train error": t_error,
+        "validation log likelihood": v_log_likelihood,
+        "validation error": v_error,
+    }
+
+
 def train(model, ds, **kwargs):
     """main training loop
 
@@ -344,10 +396,9 @@ def train(model, ds, **kwargs):
 
                 gnorm = np.sqrt(np.sum(np.square(diff)))
 
-                bl = model.results.best_loglikelihood
                 if (
                     (gnorm < (gnorm_min / 5.0))
-                    or (log_likelihood > (0.95 * bl))
+                    or (log_likelihood > (0.95 * model.results.best_loglikelihood))
                     or ((epoch % (max_epochs // 10)) == 0)
                 ):
                     error = model.prediction_error_fn(*valid_data)

diff --git a/tests/test_compute.py b/tests/test_compute.py
@@ -0,0 +1,42 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from pycmtensor.dataset import Dataset
+from pycmtensor.expressions import Beta
+from pycmtensor.models import MNL, compute
+
+
+@pytest.fixture
+def lpmc_ds():
+    df = pd.read_csv("data/lpmc.dat", sep="\t")
+    df = df[df["travel_year"] == 2015]
+    ds = Dataset(df=df, choice="travel_mode")
+    ds.split(0.8)
+    return ds
+
+
+def test_compute(lpmc_ds):
+    ds = lpmc_ds
+    asc_walk = Beta("asc_walk", 0.0, None, None, 1)
+    asc_cycle = Beta("asc_cycle", 0.0, None, None, 0)
+    asc_pt = Beta("asc_pt", 0.0, None, None, 0)
+    asc_drive = Beta("asc_drive", 0.0, None, None, 0)
+    b_time = Beta("b_time", 0.0, None, None, 0)
+
+    U_walk = asc_walk
+    U_cycle = asc_cycle
+    U_pt = asc_pt
+    U_drive = asc_drive + b_time * ds["dur_driving"]
+
+    U = [U_walk, U_cycle, U_pt, U_drive]
+    mymodel = MNL(ds, locals(), U)
+
+    compute(
+        mymodel,
+        ds,
+        b_time=-5.009,
+        asc_pt=-1.398,
+        asc_drive=-4.178,
+        asc_cycle=-3.996,
+    )