Skip to content

Commit

Permalink
Merge pull request #81 from mwong009/dev
Browse files Browse the repository at this point in the history
add learning rate lower bounds for decaying functions
  • Loading branch information
mwong009 authored Aug 10, 2023
2 parents 791222e + 11dcd92 commit e6431a6
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 92 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
build-install-test:
strategy:
matrix:
os: ["windows-latest", "ubuntu-latest", "macos-latest"]
os: ["windows-latest", "ubuntu-latest"] # , "macos-latest"]
python-version: ["3.11"]
runs-on: ${{ matrix.os }}
steps:
Expand Down Expand Up @@ -94,7 +94,7 @@ jobs:
- name: Upload coverage to codecov.io
if: |
(matrix.os == 'ubuntu-latest') &&
(matrix.os == 'windows-latest') &&
(matrix.python-version == '3.11')
uses: codecov/codecov-action@v2
with:
Expand Down
6 changes: 3 additions & 3 deletions pycmtensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
config.add(
"validation_threshold",
1.003,
1.005,
"The factor of the validation error score to meet in order to register an improvement",
)
config.add("base_learning_rate", 0.01, "The initial learning rate of the model update")
Expand Down Expand Up @@ -66,8 +66,8 @@
"Learning rate scheduler to use for model estimation",
)
config.add("lr_ExpRangeCLR_gamma", 0.5, "Gamma parameter for `ExpRangeCLR`")
config.add("lr_stepLR_factor", 0.5, "Drop step multiplier factor for `stepLR`")
config.add("lr_stepLR_drop_every", 10, "Drop learning rate every n steps for `stepLR`")
config.add("lr_stepLR_factor", 0.95, "Drop step multiplier factor for `stepLR`")
config.add("lr_stepLR_drop_every", 20, "Drop learning rate every n steps for `stepLR`")
config.add("lr_CLR_cycle_steps", 16, "Steps per cycle for `CyclicLR`")
config.add("lr_PolynomialLR_power", 0.999, "Power factor for `PolynomialLR`")
config.add(
Expand Down
24 changes: 5 additions & 19 deletions pycmtensor/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,34 +52,20 @@ def parse(self, expression):
"-",
"+",
"/",
":",
"AdvancedSubtensor",
"Reshape",
"Abs",
"join",
"sum",
"dtype",
"ARange",
":",
"int64",
"axis",
"Softmax",
"None",
"log",
"Assert",
"i0",
"i1",
"i2",
"i3",
"AND",
"OR",
"EQ",
"not",
"Shape",
"Switch",
"BroadcastTo",
"Composite",
"Could",
"ScalarFromTensor",
"Abs",
"Softmax",
"Switch",
"dtype",
]:
stdout = str.replace(stdout, s, " ")
symbols = [s for s in str.split(stdout, " ") if len(s) > 0]
Expand Down
4 changes: 2 additions & 2 deletions pycmtensor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def second_order_derivative(cost, params):
for p in params:
if isinstance(p, Beta):
param = p()
if hasattr(p, "output"):
if "output" in dir(p):
param.name = p.name
wrt_params.append(param)

Expand Down Expand Up @@ -377,7 +377,7 @@ def first_order_derivative(cost, params):
for p in params:
if isinstance(p, Beta):
param = p()
if hasattr(p, "output"):
if "output" in dir(p):
param.name = p.name
wrt_params.append(param)

Expand Down
62 changes: 29 additions & 33 deletions pycmtensor/models/MNL.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import aesara
import aesara.tensor as aet
from aesara import pprint

from pycmtensor.expressions import Beta
from pycmtensor.functions import (
Expand Down Expand Up @@ -64,57 +63,36 @@ def __init__(self, ds, params, utility, av=None, **kwargs):
self.params = extract_params(self.cost, params)
self.betas = [p for p in self.params if isinstance(p, Beta)]

# drop unused variables form dataset
# drop unused variables from dataset
drop_unused = drop_unused_variables(self.cost, self.params, ds())
ds.drop(drop_unused)

self.x = ds.x
self.xy = self.x + [self.y]
info(f"choice: {self.y}")
info(f"inputs in {self.name}: {self.x}")

start_time = perf_counter()
self.build_fn()
self.build_cost_fn()
build_time = round(perf_counter() - start_time, 3)

self.results.build_time = time_format(build_time)
info(f"Build time = {self.results.build_time}")

def build_cost_updates_fn(self, updates):
"""Method to call to build/rebuilt cost function with updates to the model. Creates a class function `MNL.cost_updates_fn(*inputs, output, lr)` that receives a list of input variable arrays, the output array, and a learning rate.
Args:
updates (List[Tuple[TensorSharedVariable, TensorVariable]]): The list of tuples containing the target shared variable and the new value of the variable.
"""
self.cost_updates_fn = aesara.function(
name="cost_updates",
inputs=self.x + [self.y, self.learning_rate, self.index],
outputs=self.cost,
updates=updates,
)

def build_fn(self):
"""Method to call to build mathematical operations without updates to the model"""

def build_cost_fn(self):
"""method to construct aesara functions for cost and prediction errors"""
self.log_likelihood_fn = aesara.function(
name="log_likelihood", inputs=self.x + [self.y, self.index], outputs=self.ll
)

self.choice_probabilities_fn = aesara.function(
name="choice_probabilities",
inputs=self.x,
outputs=self.p_y_given_x.swapaxes(0, 1),
)

self.choice_predictions_fn = aesara.function(
name="choice_predictions", inputs=self.x, outputs=self.pred
)

self.prediction_error_fn = aesara.function(
name="prediction_error",
inputs=self.x + [self.y],
outputs=errors(self.p_y_given_x, self.y),
)

def build_gh_fn(self):
"""method to construct aesara functions for hessians and gradient vectors"""
self.hessian_fn = aesara.function(
name="hessian",
inputs=self.x + [self.y, self.index],
Expand All @@ -129,8 +107,26 @@ def build_fn(self):
allow_input_downcast=True,
)

def __str__(self):
return f"{self.name}"
def build_cost_updates_fn(self, updates):
"""Method to call to build/rebuilt cost function with updates to the model. Creates a class function `MNL.cost_updates_fn(*inputs, output, lr)` that receives a list of input variable arrays, the output array, and a learning rate.
Args:
updates (List[Tuple[TensorSharedVariable, TensorVariable]]): The list of tuples containing the target shared variable and the new value of the variable.
"""
BaseModel.build_cost_updates_fn(self, updates)

def predict(self, ds, return_probabilities=False):
"""predicts the output of the most likely alternative given the validation dataset in `ds`. The formula is:
$$
argmax(p_n(y|x))
$$
def __repr__(self):
return pprint(self.cost)
Args:
ds (Dataset): pycmtensor dataset
return_probabilities (bool): if true, returns the probability vector instead
Returns:
(numpy.ndarray): the predicted choices or the vector of probabilities
"""
return BaseModel.predict(self, ds, return_probabilities)
Loading

0 comments on commit e6431a6

Please sign in to comment.