Merge pull request #81 from mwong009/dev

add learning rate lower bounds for decaying functions
mwong009 · Aug 10, 2023 · e6431a6 · e6431a6
2 parents 791222e + 11dcd92
commit e6431a6
Show file tree

Hide file tree

Showing 8 changed files with 142 additions and 92 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -10,7 +10,7 @@ jobs:
   build-install-test:
     strategy:
       matrix:
-        os: ["windows-latest", "ubuntu-latest", "macos-latest"]
+        os: ["windows-latest", "ubuntu-latest"] # , "macos-latest"]
         python-version: ["3.11"]
     runs-on: ${{ matrix.os }}
     steps:
@@ -94,7 +94,7 @@ jobs:
       
     - name: Upload coverage to codecov.io
       if: |
-        (matrix.os == 'ubuntu-latest') &&
+        (matrix.os == 'windows-latest') &&
         (matrix.python-version == '3.11')
       uses: codecov/codecov-action@v2
       with:

diff --git a/pycmtensor/__init__.py b/pycmtensor/__init__.py
@@ -35,7 +35,7 @@
 )
 config.add(
     "validation_threshold",
-    1.003,
+    1.005,
     "The factor of the validation error score to meet in order to register an improvement",
 )
 config.add("base_learning_rate", 0.01, "The initial learning rate of the model update")
@@ -66,8 +66,8 @@
     "Learning rate scheduler to use for model estimation",
 )
 config.add("lr_ExpRangeCLR_gamma", 0.5, "Gamma parameter for `ExpRangeCLR`")
-config.add("lr_stepLR_factor", 0.5, "Drop step multiplier factor for `stepLR`")
-config.add("lr_stepLR_drop_every", 10, "Drop learning rate every n steps for `stepLR`")
+config.add("lr_stepLR_factor", 0.95, "Drop step multiplier factor for `stepLR`")
+config.add("lr_stepLR_drop_every", 20, "Drop learning rate every n steps for `stepLR`")
 config.add("lr_CLR_cycle_steps", 16, "Steps per cycle for `CyclicLR`")
 config.add("lr_PolynomialLR_power", 0.999, "Power factor for `PolynomialLR`")
 config.add(

diff --git a/pycmtensor/expressions.py b/pycmtensor/expressions.py
@@ -52,34 +52,20 @@ def parse(self, expression):
             "-",
             "+",
             "/",
+            ":",
             "AdvancedSubtensor",
             "Reshape",
-            "Abs",
-            "join",
-            "sum",
-            "dtype",
             "ARange",
-            ":",
-            "int64",
-            "axis",
-            "Softmax",
-            "None",
-            "log",
             "Assert",
-            "i0",
-            "i1",
-            "i2",
-            "i3",
-            "AND",
-            "OR",
-            "EQ",
-            "not",
             "Shape",
-            "Switch",
             "BroadcastTo",
             "Composite",
             "Could",
             "ScalarFromTensor",
+            "Abs",
+            "Softmax",
+            "Switch",
+            "dtype",
         ]:
             stdout = str.replace(stdout, s, " ")
         symbols = [s for s in str.split(stdout, " ") if len(s) > 0]

diff --git a/pycmtensor/functions.py b/pycmtensor/functions.py
@@ -340,7 +340,7 @@ def second_order_derivative(cost, params):
     for p in params:
         if isinstance(p, Beta):
             param = p()
-            if hasattr(p, "output"):
+            if "output" in dir(p):
                 param.name = p.name
             wrt_params.append(param)
 
@@ -377,7 +377,7 @@ def first_order_derivative(cost, params):
     for p in params:
         if isinstance(p, Beta):
             param = p()
-            if hasattr(p, "output"):
+            if "output" in dir(p):
                 param.name = p.name
             wrt_params.append(param)
 

diff --git a/pycmtensor/models/MNL.py b/pycmtensor/models/MNL.py
@@ -2,7 +2,6 @@
 
 import aesara
 import aesara.tensor as aet
-from aesara import pprint
 
 from pycmtensor.expressions import Beta
 from pycmtensor.functions import (
@@ -64,57 +63,36 @@ def __init__(self, ds, params, utility, av=None, **kwargs):
         self.params = extract_params(self.cost, params)
         self.betas = [p for p in self.params if isinstance(p, Beta)]
 
-        # drop unused variables form dataset
+        # drop unused variables from dataset
         drop_unused = drop_unused_variables(self.cost, self.params, ds())
         ds.drop(drop_unused)
 
         self.x = ds.x
         self.xy = self.x + [self.y]
+        info(f"choice: {self.y}")
         info(f"inputs in {self.name}: {self.x}")
 
         start_time = perf_counter()
-        self.build_fn()
+        self.build_cost_fn()
         build_time = round(perf_counter() - start_time, 3)
 
         self.results.build_time = time_format(build_time)
         info(f"Build time = {self.results.build_time}")
 
-    def build_cost_updates_fn(self, updates):
-        """Method to call to build/rebuilt cost function with updates to the model. Creates a class function `MNL.cost_updates_fn(*inputs, output, lr)` that receives a list of input variable arrays, the output array, and a learning rate.
-
-        Args:
-            updates (List[Tuple[TensorSharedVariable, TensorVariable]]): The list of tuples containing the target shared variable and the new value of the variable.
-        """
-        self.cost_updates_fn = aesara.function(
-            name="cost_updates",
-            inputs=self.x + [self.y, self.learning_rate, self.index],
-            outputs=self.cost,
-            updates=updates,
-        )
-
-    def build_fn(self):
-        """Method to call to build mathematical operations without updates to the model"""
-
+    def build_cost_fn(self):
+        """method to construct aesara functions for cost and prediction errors"""
         self.log_likelihood_fn = aesara.function(
             name="log_likelihood", inputs=self.x + [self.y, self.index], outputs=self.ll
         )
 
-        self.choice_probabilities_fn = aesara.function(
-            name="choice_probabilities",
-            inputs=self.x,
-            outputs=self.p_y_given_x.swapaxes(0, 1),
-        )
-
-        self.choice_predictions_fn = aesara.function(
-            name="choice_predictions", inputs=self.x, outputs=self.pred
-        )
-
         self.prediction_error_fn = aesara.function(
             name="prediction_error",
             inputs=self.x + [self.y],
             outputs=errors(self.p_y_given_x, self.y),
         )
 
+    def build_gh_fn(self):
+        """method to construct aesara functions for hessians and gradient vectors"""
         self.hessian_fn = aesara.function(
             name="hessian",
             inputs=self.x + [self.y, self.index],
@@ -129,8 +107,26 @@ def build_fn(self):
             allow_input_downcast=True,
         )
 
-    def __str__(self):
-        return f"{self.name}"
+    def build_cost_updates_fn(self, updates):
+        """Method to call to build/rebuilt cost function with updates to the model. Creates a class function `MNL.cost_updates_fn(*inputs, output, lr)` that receives a list of input variable arrays, the output array, and a learning rate.
+
+        Args:
+            updates (List[Tuple[TensorSharedVariable, TensorVariable]]): The list of tuples containing the target shared variable and the new value of the variable.
+        """
+        BaseModel.build_cost_updates_fn(self, updates)
+
+    def predict(self, ds, return_probabilities=False):
+        """predicts the output of the most likely alternative given the validation dataset in `ds`. The formula is:
+
+        $$
+            argmax(p_n(y|x))
+        $$
 
-    def __repr__(self):
-        return pprint(self.cost)
+        Args:
+            ds (Dataset): pycmtensor dataset
+            return_probabilities (bool): if true, returns the probability vector instead
+
+        Returns:
+            (numpy.ndarray): the predicted choices or the vector of probabilities
+        """
+        return BaseModel.predict(self, ds, return_probabilities)