From ce3458bf6224e17d1ccde48e6f3450d3ccb4d7d0 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Thu, 2 Nov 2023 10:45:02 -0700 Subject: [PATCH 01/11] outputs --- idaes/core/base/flowsheet_model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/idaes/core/base/flowsheet_model.py b/idaes/core/base/flowsheet_model.py index d0775ac2b7..f8e92c104b 100644 --- a/idaes/core/base/flowsheet_model.py +++ b/idaes/core/base/flowsheet_model.py @@ -74,10 +74,7 @@ def __init__(self): self.visualize = self._visualize_null self.installed = False else: - # FIXME the explicit submodule import is needed because the idaes_ui doesn't import its fv submodule - # otherwise, you get "AttributeError: module 'idaes_ui' has no 'fv' attribute" - import idaes_ui.fv - + import idaes_ui self.visualize = idaes_ui.fv.visualize self.installed = True From 5aa521dafed02efb75e670881ed22cdd2fee8d51 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Mon, 3 Jun 2024 12:18:08 -0700 Subject: [PATCH 02/11] changes --- idaes/config.py | 2 + idaes/core/surrogate/pysmo/kriging.py | 48 +++++++++---------- .../surrogate/pysmo/tests/test_kriging.py | 25 ++++++---- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/idaes/config.py b/idaes/config.py index 584a90cd66..e591d232fd 100644 --- a/idaes/config.py +++ b/idaes/config.py @@ -496,6 +496,7 @@ def _new_idaes_config_block(): "properties", "reactions", "ui", + "surrogate", ] ), domain=set, @@ -515,6 +516,7 @@ def _new_idaes_config_block(): "control_volume", "properties", "reactions", + "surrogate", ] ), domain=set, diff --git a/idaes/core/surrogate/pysmo/kriging.py b/idaes/core/surrogate/pysmo/kriging.py index ad04fffb0c..9babf125a9 100644 --- a/idaes/core/surrogate/pysmo/kriging.py +++ b/idaes/core/surrogate/pysmo/kriging.py @@ -31,6 +31,14 @@ # Imports from IDAES namespace from idaes.core.surrogate.pysmo.sampling import FeatureScaling as fs +from idaes.logger import getIdaesLogger + +# Logging +_log = getIdaesLogger(__name__, tag="surrogate") + + +def set_log_level(level): + _log.setLevel(level) class MyBounds(object): @@ -145,11 +153,7 @@ def __init__( if ( os.path.exists(fname) and overwrite is True ): # Explicit overwrite done by user - print( - "Warning:", - fname, - "already exists; previous file will be overwritten.\n", - ) + _log.warning(f"'{fname}' already exists; previous file will be overwritten") self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting self.filename = ( @@ -158,12 +162,8 @@ def __init__( + pd.Timestamp.today().strftime("%m-%d-%y_%H%M%S") + ".pickle" ) - print( - "Warning:", - fname, - 'already exists; results will be saved to "', - self.filename, - '".\n', + _log.warning( + f"'{fname}' already exists; results will be saved to {self.filename}" ) # self.filename = 'solution.pickle' elif os.path.exists(fname) is False: @@ -314,9 +314,9 @@ def kriging_sd(cov_inv, y_mu, ns): # sigma_sq = np.matmul(y_mu.transpose(), np.matmul(cov_inv, y_mu)) / ns return sigma_sq - @staticmethod - def print_fun(x, f, accepted): - print("at minimum %.4f accepted %d" % (f, int(accepted))) + # @staticmethod + # def print_fun(x, f, accepted): + # print("at minimum %.4f accepted %d" % (f, int(accepted))) def objective_function(self, var_vector, x, y, p): """ @@ -421,7 +421,7 @@ def parameter_optimization(self, p): bounds = tuple(bounds) if self.num_grads: - print("Optimizing kriging parameters using L-BFGS-B algorithm...") + _log.info("Optimizing kriging parameters using L-BFGS-B algorithm...") other_args = (self.x_data_scaled, self.y_data, p) # opt_results = opt.minimize(self.objective_function, initial_value, args=other_args, method='L-BFGS-B', jac=self.numerical_gradient, bounds=bounds, options={'gtol': 1e-7}) #, 'disp': True}) opt_results1 = opt.minimize( @@ -447,7 +447,7 @@ def parameter_optimization(self, p): else: opt_results = opt_results2 else: - print("Optimizing Kriging parameters using Basinhopping algorithm...") + _log.info("Optimizing Kriging parameters using Basinhopping algorithm...") other_args = { "args": (self.x_data_scaled, self.y_data, p), "bounds": bounds, @@ -466,7 +466,8 @@ def parameter_optimization(self, p): def optimal_parameter_evaluation(self, var_vector, p): """ - The optimal_parameter_evaluation method evaluates the values of all the parameters of the final Kriging model. + The optimal_parameter_evaluation method evaluates the values of all the parameters + of the final Kriging model. For an input set of Kriging parameters var_vector and p, it: (1) Generates the covariance matrix by calling covariance_matrix_generator @@ -501,13 +502,8 @@ def optimal_parameter_evaluation(self, var_vector, p): mean = self.kriging_mean(cov_inv, self.y_data) y_mu = self.y_mu_calculation(self.y_data, mean) variance = self.kriging_sd(cov_inv, y_mu, ns) - print( - "\nFinal results\n================\nTheta:", - theta, - "\nMean:", - mean, - "\nRegularization parameter:", - reg_param, + _log.info( + f"results: theta={theta} mean={mean} regularization-parameter={reg_param}" ) return theta, reg_param, mean, variance, cov_mat, cov_inv, y_mu @@ -738,9 +734,9 @@ def pickle_save(self, solutions): try: filehandler = open(self.filename, "wb") pickle.dump(solutions, filehandler) - print("\nResults saved in ", str(self.filename)) + _log.info(f"results saved in '{self.filename}'") except: - raise IOError("File could not be saved.") + raise IOError(f"File '{self.filename}' could not be saved.") @staticmethod def pickle_load(solution_file): diff --git a/idaes/core/surrogate/pysmo/tests/test_kriging.py b/idaes/core/surrogate/pysmo/tests/test_kriging.py index 87fcd4f17e..ed33dc93d2 100644 --- a/idaes/core/surrogate/pysmo/tests/test_kriging.py +++ b/idaes/core/surrogate/pysmo/tests/test_kriging.py @@ -16,11 +16,16 @@ from unittest.mock import patch sys.path.append(os.path.abspath("..")) # current folder is ~/tests +from idaes.core.surrogate.pysmo import kriging from idaes.core.surrogate.pysmo.kriging import KrigingModel +from idaes import logger as idaes_logger import numpy as np import pandas as pd import pytest +# Turn down the logging during the test +kriging.set_log_level(idaes_logger.ERROR) + class TestKrigingModel: y = np.array( @@ -219,16 +224,16 @@ def test_kriging_sd(self, array_type): sigma_sq_exp = 272.84104637 assert np.round(sigma_sq_exp, 5) == np.round(sigma_sq[0][0], 5) - @pytest.mark.unit - @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test_print_fun(self, array_type): - input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) - capturedOutput = io.StringIO() - sys.stdout = capturedOutput - KrigingClass.print_fun(1, 2, 3.7) - sys.stdout = sys.__stdout__ - assert "at minimum 2.0000 accepted 3\n" == capturedOutput.getvalue() + # @pytest.mark.unit + # @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + # def test_print_fun(self, array_type): + # input_array = array_type(self.training_data) + # KrigingClass = KrigingModel(input_array[0:3], regularization=True) + # capturedOutput = io.StringIO() + # sys.stdout = capturedOutput + # KrigingClass.print_fun(1, 2, 3.7) + # sys.stdout = sys.__stdout__ + # assert "at minimum 2.0000 accepted 3\n" == capturedOutput.getvalue() @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) From 7fe182eaf1bbf627a9135365f3dc2da4047ae319 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Mon, 3 Jun 2024 12:49:55 -0700 Subject: [PATCH 03/11] more changes --- idaes/core/surrogate/pysmo/kriging.py | 1 + .../surrogate/pysmo/polynomial_regression.py | 169 +++++++++--------- .../pysmo/tests/test_polynomial_regression.py | 5 + 3 files changed, 94 insertions(+), 81 deletions(-) diff --git a/idaes/core/surrogate/pysmo/kriging.py b/idaes/core/surrogate/pysmo/kriging.py index 9babf125a9..fa20bc6875 100644 --- a/idaes/core/surrogate/pysmo/kriging.py +++ b/idaes/core/surrogate/pysmo/kriging.py @@ -38,6 +38,7 @@ def set_log_level(level): + """Set logging level for the default logger in this module.""" _log.setLevel(level) diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index 7d41719015..0fdca66b28 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -13,6 +13,26 @@ # TODO: Missing doc strings # pylint: disable=missing-module-docstring # pylint: disable=missing-function-docstring +""" +The purpose of this file is to perform polynomial regression in Pyomo. +This will be done in two stages. First, a sampling plan will +be used to select samples for generating a surrogate model. +In the second stage, the surrogate model is constructed by fitting to +different order polynomials. Long term, an iterative adaptive sampling +approach will be incorporated for model improvement. +Cross-validation is used to select the best model. + + +FeatureScaling: +Simple script for scaling and un-scaling the input data + +Polynomial Regression: +Three approaches are implemented for evaluating polynomial coefficients - +a. Moore-Penrose maximum likelihood method (Forrester et al.) +b. Optimization using the BFGS algorithm. +c. Optimization with Pyomo +The Pyomo optimization approach is enabled as the default at this time. +""" import os.path import warnings @@ -42,36 +62,22 @@ # Imports from IDAES namespace from idaes.core.surrogate.pysmo.utils import NumpyEvaluator - +from idaes.logger import getIdaesLogger __author__ = "Oluwamayowa Amusat" -""" -The purpose of this file is to perform polynomial regression in Pyomo. -This will be done in two stages. First, a sampling plan will -be used to select samples for generating a surrogate model. -In the second stage, the surrogate model is constructed by fitting to -different order polynomials. Long term, an iterative adaptive sampling -approach will be incorporated for model improvement. -Cross-validation is used to select the best model. - +# Logging +_log = getIdaesLogger(__name__, tag="surrogate") -FeatureScaling: -Simple script for scaling and un-scaling the input data -Polynomial Regression: -Three approaches are implemented for evaluating polynomial coefficients - -a. Moore-Penrose maximum likelihood method (Forrester et al.) -b. Optimization using the BFGS algorithm. -c. Optimization with Pyomo -The Pyomo optimization approach is enabled as the default at this time. -""" +def set_log_level(level): + """Set logging level for the default logger in this module.""" + _log.setLevel(level) class FeatureScaling: - """ - - A class for scaling and unscaling input and output data. The class contains two main methods: ``data_scaling`` and ``data_unscaling`` + """Scale and unscale input and output data. + The class contains two main methods: ``data_scaling`` and ``data_unscaling`` """ def __init__(self): @@ -186,6 +192,8 @@ class PolynomialRegression: """ + MAX_POLY = 10 # maximum polynomial order + def __init__( self, original_data_input, @@ -250,9 +258,10 @@ def __init__( - When the number of cross-validations is too high, i.e. number_of_crossvalidations > 10 """ - print( - "\n===========================Polynomial Regression===============================================\n" - ) + # print( + # "\n===========================Polynomial Regression===============================================\n" + # ) + _log.info("Polynomial Regression (begin)") # Checks if fname is provided or exists in the path if not isinstance(overwrite, bool): # PYLINT-TODO @@ -274,11 +283,7 @@ def __init__( if ( os.path.exists(fname) and overwrite is True ): # Explicit overwrite done by user - print( - "Warning:", - fname, - "already exists; previous file will be overwritten.\n", - ) + _log.warn(f"Warning: '{fname}' exists, previous file will be overwritten") self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting self.filename = ( @@ -287,12 +292,9 @@ def __init__( + pd.Timestamp.today().strftime("%m-%d-%y_%H%M%S") + ".pickle" ) - print( - "Warning:", - fname, - 'already exists; results will be saved to "', - self.filename, - '".\n', + _log.warn( + f"Warning: '{fname}' exists, " + f"results will be saved to {self.filename}" ) # self.filename = 'solution.pickle' elif os.path.exists(fname) is False: @@ -342,25 +344,31 @@ def __init__( self.original_data = original_data self.regression_data = regression_data - if number_of_crossvalidations is None: - print("The number of cross-validation cases (3) is used.") - number_of_crossvalidations = 3 - elif number_of_crossvalidations > 10: - warnings.warn( - "The number of cross-validations entered is large. The simulation may take a while to run" + num_cross = number_of_crossvalidations + if num_cross is None: + num_cross = 3 + _log.info(f"Use default number of cross-validations: {num_cross}") + elif num_cross > 10: + # warnings.warn( + _log.warn( + f"The number of cross-validations ({num_cross}) is large. " + f"The simulation may take a while to run" ) - self.number_of_crossvalidations = number_of_crossvalidations + self.number_of_crossvalidations = num_cross - if not isinstance(maximum_polynomial_order, int): + max_poly = maximum_polynomial_order + if not isinstance(max_poly, int): # PYLINT-TODO # pylint: disable-next=broad-exception-raised raise Exception("Maximum polynomial order must be an integer") - elif maximum_polynomial_order > 10: - warnings.warn( - "The maximum allowed polynomial order is 10. Value has been adjusted to 10." + elif max_poly > self.MAX_POLY: + # warnings.warn( + _log.warn( + f"Maximum polynomial order value ({max_poly})" + f"reduced maximum allowed ({self.MAX_POLY})" ) - maximum_polynomial_order = 10 - self.max_polynomial_order = maximum_polynomial_order + max_poly = 10 + self.max_polynomial_order = max_poly self.number_of_x_vars = regression_data.shape[1] - 1 @@ -435,7 +443,7 @@ def __init__( if solution_method is None: solution_method = "pyomo" self.solution_method = solution_method - print("Default parameter estimation method is used.") + _log.warning("Using default parameter estimation method") elif not isinstance(solution_method, str): # PYLINT-TODO # pylint: disable-next=broad-exception-raised @@ -453,7 +461,7 @@ def __init__( raise Exception( 'Invalid parameter estimation method entered. Select one of maximum likelihood (solution_method="mle"), Pyomo optimization (solution_method="pyomo") or BFGS (solution_method="bfgs") methods. ' ) - print("Parameter estimation method: ", self.solution_method, "\n") + _log.info(f"Parameter estimation method: {self.solution_method}") if multinomials is None: self.multinomials = 1 @@ -1179,7 +1187,8 @@ def polynomial_regression_fitting(self, additional_regression_features=None): See information on ResultReport class for details on contents. """ - # Parameters that represent the best solution found at each iteration based on the cross-validation error + # Parameters that represent the best solution found at each iteration, + # based on the cross-validation error best_error = 1e20 train_error_fit = 1e20 phi_best = 0 @@ -1188,15 +1197,13 @@ def polynomial_regression_fitting(self, additional_regression_features=None): if (additional_regression_features is None) or ( len(additional_regression_features) == 0 ): - print( - "max_fraction_training_samples set at ", - self.max_fraction_training_samples, + _log.info( + f"max_fraction_training_samples=" + f"{self.max_fraction_training_samples}, " + f"number of adaptive samples (no_adaptive_samples)=" + f"{self.no_adaptive_samples}, " + f"maximum number of iterations (Max_iter)={self.max_iter}" ) - print( - "Number of adaptive samples (no_adaptive_samples) set at ", - self.no_adaptive_samples, - ) - print("Maximum number of iterations (Max_iter) set at: ", self.max_iter) training_data, cross_val_data = self.training_test_data_creation() for poly_order in range(1, self.max_polynomial_order + 1): @@ -1211,10 +1218,9 @@ def polynomial_regression_fitting(self, additional_regression_features=None): phi_best = phi order_best = poly_order train_error_fit = train_error - print( - "\nInitial surrogate model is of order", - order_best, - " with a cross-val error of %4f" % best_error, + _log.info( + f"Initial surrogate model is of order {order_best}" + f"with a cross-validation error of {best_error:.4f}" ) # Next, Calculate and report errors. ( @@ -1224,16 +1230,17 @@ def polynomial_regression_fitting(self, additional_regression_features=None): r_square, r_square_adj, ) = self.surrogate_performance(phi_best, order_best) - print( - "Initial Regression Model Performance:\nOrder: ", - order_best, - " / MAE: %4f" % mae_error, - " / MSE: %4f" % mse_error, - " / R^2: %4f" % r_square, - " / Adjusted R^2: %4f" % r_square_adj, + _log.info( + "Initial Regression Model Performance:\n" + f"Order: {order_best} " + f" / MAE: {mae_error:.4f}" + f" / MSE: {mse_error:.4f}" + f" / R^2: {r_square:.4f}" + f" / Adjusted R^2: {r_square_adj:.4f}" ) - # Parameters that retain the previous best solutions. They are compared to the best solution at each iteration based on the R-square coefficient. + # Parameters that retain the previous best solutions. They are compared + # to the best solution at each iteration based on the R-square coefficient. ( order_opt, train_error_opt, @@ -1274,11 +1281,12 @@ def polynomial_regression_fitting(self, additional_regression_features=None): < self.original_data.shape[0] ) ): - print("\n-------------------------------------------------") - print("\nIteration ", iteration_number) + # print("\n-------------------------------------------------") + _log.debug(f"<< iteration={iteration_number} >>") best_error = 1e20 - # Select n_adaptive_samples worst fitting points to be added to the dataset used in the previous evaluation. + # Select n_adaptive_samples worst fitting points to be added to the + # dataset used in the previous evaluation. scv_input_data = sorted_comparison_vector[:, :-2] sorted_comparison_vector_unique = scv_input_data[ np.all( @@ -1289,7 +1297,7 @@ def polynomial_regression_fitting(self, additional_regression_features=None): ) ] adaptive_samples = sorted_comparison_vector_unique[ - # PYLINT-WHY: pylint considers self.no_adaptive_samples to be None here + # pylint considers self.no_adaptive_samples to be None here # pylint: disable=invalid-unary-operand-type -self.no_adaptive_samples :, :, @@ -1301,11 +1309,10 @@ def polynomial_regression_fitting(self, additional_regression_features=None): self.number_of_samples = self.regression_data.shape[ 0 ] # Never forget to update - print( - "\n", - self.no_adaptive_samples, - " additional points added to training data. New number of training samples: ", - self.regression_data.shape[0], + _log.debug( + f"{self.no_adaptive_samples} " + f" additional points added to training data. " + f"New number of training samples: {self.regression_data.shape[0]}" ) training_data, cross_val_data = self.training_test_data_creation() diff --git a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py index b02733377f..b00f3b362a 100644 --- a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py @@ -13,6 +13,7 @@ import sys, os from unittest.mock import patch +from idaes.core.surrogate.pysmo import polynomial_regression from idaes.core.surrogate.pysmo.polynomial_regression import ( PolynomialRegression, FeatureScaling, @@ -20,6 +21,10 @@ import numpy as np import pandas as pd import pytest +from idaes import logger as idaes_logger + +# Turn down the logging during the test +polynomial_regression.set_log_level(idaes_logger.ERROR) class TestFeatureScaling: From aa68cd87b2d05abea3d437acd3e9a6622555dad1 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Mon, 3 Jun 2024 14:18:20 -0700 Subject: [PATCH 04/11] more changes - breaking --- .../surrogate/pysmo/polynomial_regression.py | 150 ++++++++++-------- .../pysmo/tests/test_polynomial_regression.py | 19 ++- 2 files changed, 94 insertions(+), 75 deletions(-) diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index 0fdca66b28..fd2e219a1f 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -35,7 +35,6 @@ """ import os.path -import warnings import pickle # Imports from third parties @@ -283,7 +282,9 @@ def __init__( if ( os.path.exists(fname) and overwrite is True ): # Explicit overwrite done by user - _log.warn(f"Warning: '{fname}' exists, previous file will be overwritten") + _log.warning( + f"Warning: '{fname}' exists, previous file will be overwritten" + ) self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting self.filename = ( @@ -292,7 +293,7 @@ def __init__( + pd.Timestamp.today().strftime("%m-%d-%y_%H%M%S") + ".pickle" ) - _log.warn( + _log.warning( f"Warning: '{fname}' exists, " f"results will be saved to {self.filename}" ) @@ -349,11 +350,12 @@ def __init__( num_cross = 3 _log.info(f"Use default number of cross-validations: {num_cross}") elif num_cross > 10: - # warnings.warn( - _log.warn( + msg = ( f"The number of cross-validations ({num_cross}) is large. " f"The simulation may take a while to run" ) + # warnings.warn(msg) + _log.warning(msg) self.number_of_crossvalidations = num_cross max_poly = maximum_polynomial_order @@ -362,18 +364,19 @@ def __init__( # pylint: disable-next=broad-exception-raised raise Exception("Maximum polynomial order must be an integer") elif max_poly > self.MAX_POLY: - # warnings.warn( - _log.warn( + msg = ( f"Maximum polynomial order value ({max_poly})" f"reduced maximum allowed ({self.MAX_POLY})" ) + # warnings.warn(msg) + _log.warning(msg) max_poly = 10 self.max_polynomial_order = max_poly self.number_of_x_vars = regression_data.shape[1] - 1 if training_split is None: - print("The default training/cross-validation split of 0.75 is used.") + _log.warning("The default training/cross-validation split of 0.75 is used.") training_split = 0.75 elif training_split >= 1 or training_split <= 0: # PYLINT-TODO @@ -405,7 +408,7 @@ def __init__( regression_data.shape[0] == original_data.shape[0] or no_adaptive_samples == 0 ): - print("No iterations will be run.") + _log.warning("No iterations will be run.") max_iter = 0 self.max_iter = max_iter @@ -492,6 +495,16 @@ def __init__( self.extra_terms_feature_vector = None self.fit_status = None + @staticmethod + def _format_model_perf(order, mae_error, mse_error, r_square, r_square_adj=None): + s = ( + f"Order: {order} / MAE: {mae_error:.4f}" + f" / MSE: {mse_error:.4f} / R^2: {r_square:.4f}" + ) + if r_square_adj is not None: + s += f" / Adjusted R^2: {r_square_adj:.4f}" + return s + def training_test_data_creation(self, additional_features=None): """ @@ -1008,7 +1021,8 @@ def results_generation(self, beta, order): print("\n------------------------------------------------------------") print("The final coefficients of the regression terms are: \n") print("k |", beta[0, 0]) - results_df = pd.concat([results_df, pd.Series({"k": beta[0, 0]})], axis=0) + results_df = pd.concat([#results_df, + pd.Series({"k": beta[0, 0]})], axis=0) if self.multinomials == 1: for i in range(1, order + 1): for j in range(1, self.number_of_x_vars + 1): @@ -1232,11 +1246,9 @@ def polynomial_regression_fitting(self, additional_regression_features=None): ) = self.surrogate_performance(phi_best, order_best) _log.info( "Initial Regression Model Performance:\n" - f"Order: {order_best} " - f" / MAE: {mae_error:.4f}" - f" / MSE: {mse_error:.4f}" - f" / R^2: {r_square:.4f}" - f" / Adjusted R^2: {r_square_adj:.4f}" + + self._format_model_perf( + order_best, mae_error, mse_error, r_square, r_square_adj + ) ) # Parameters that retain the previous best solutions. They are compared @@ -1329,10 +1341,9 @@ def polynomial_regression_fitting(self, additional_regression_features=None): phi_best = phi order_best = poly_order train_error_fit = train_error - print( - "\nThe best regression model is of order", - order_best, - " with a cross-val error of %4f" % best_error, + _log.info( + f"The best regression model is of order {order_best}" + f"with a cross-val error of {best_error:.4f}" ) ( @@ -1342,18 +1353,16 @@ def polynomial_regression_fitting(self, additional_regression_features=None): r_square, r_square_adj, ) = self.surrogate_performance(phi_best, order_best) - print( - "Regression performance on full data in iteration", - iteration_number, - "\nOrder: ", - order_best, - " / MAE: %4f" % mae_error, - " / MSE: %4f" % mse_error, - " / R_sq: %4f" % r_square, - " / Adjusted R^2: %4f" % r_square_adj, + _log.info( + f"Regression performance on full data in iteration" + f"{iteration_number} " + + self._format_model_perf( + order_best, mae_error, mse_error, r_square, r_square_adj + ) ) - # Determine if solution is improved. If yes, update solution. if no, retain previous best. + # Determine if solution is improved. If yes, update solution. + # If no, retain previous best. if r_square_adj > r_square_adj_opt: ( phi_opt, @@ -1397,24 +1406,21 @@ def polynomial_regression_fitting(self, additional_regression_features=None): # Round phi to 2.d.p and print results to screen beta_vector = np.round(phi_opt, 6) if r_square_adj_opt < 0.95: - print("\nPolynomial regression performs poorly for this dataset.") + _log.warning("Polynomial regression performs poorly for this dataset") else: - print( - "\nPolynomial regression generates a good surrogate model for the input data." + _log.info( + "Polynomial regression generates a good surrogate model for the input data" ) if iteration_number > 1: _, _, _, _ = self.error_plotting(vector_of_results) - print( - "\n-------------------------------------------------\n-------------------------------------------------" - ) - print( - "Best solution found: ", - "\nOrder: ", - order_opt, - " / MAE: %4f" % mae_error_opt, - " / MSE: %4f" % mse_error_opt, - " / R_sq: %4f" % r_square_opt, - " / Adjusted R^2: %4f" % r_square_adj_opt, + # print( + # "\n-------------------------------------------------\n-------------------------------------------------" + # ) + _log.info( + "Best solution found:\n" + + self._format_model_perf( + order_best, mae_error, mse_error, r_square, r_square_adj + ) ) dataframe_coeffs = self.results_generation(beta_vector, order_opt) @@ -1456,9 +1462,9 @@ def polynomial_regression_fitting(self, additional_regression_features=None): if r_square_opt > 0.95: self.fit_status = "ok" else: - warnings.warn( - "Polynomial regression generates poor fit for the dataset" - ) + msg = "Polynomial regression generates poor fit for the dataset" + _log.warning(msg) + # warnings.warn(msg) self.fit_status = "poor" self.pickle_save({"model": self}) @@ -1489,13 +1495,13 @@ def polynomial_regression_fitting(self, additional_regression_features=None): phi_best = phi order_best = poly_order train_error_fit = train_error - print( - "\nBest surrogate model is of order", - order_best, - " with a cross-val S.S. Error of %4f" % best_error, + _log.info( + f"Best surrogate model is of order {order_best} " + f"with a cross-val S.S. Error of {best_error:.4f}" ) - # KEY: Modification of self variable outside initialization. Required to make @surrogate_performance work here. + # KEY: Modification of self variable outside initialization. + # Required to make @surrogate_performance work here. self.original_data = self.regression_data _, mae_error, mse_error, r_square, _ = self.surrogate_performance( phi_best, order_best, additional_features_array @@ -1508,8 +1514,8 @@ def polynomial_regression_fitting(self, additional_regression_features=None): dataframe_coeffs = self.results_generation(beta_vector, order_best) extra_terms_coeffs = pd.Series(dtype="float64") - print( - "\nThe coefficients of the extra terms in additional_regression_features are:\n" + _log.debug( + "Coefficients of the extra terms in additional_regression_features:" ) for af in range(number_additional_features, 0, -1): print( @@ -1533,11 +1539,8 @@ def polynomial_regression_fitting(self, additional_regression_features=None): # Print errors print( - "\nRegression model performance on training data:\nOrder: ", - order_best, - " / MAE: %4f" % mae_error, - " / MSE: %4f" % mse_error, - " / R^2: %4f" % r_square, + "\nRegression model performance on training data:\n" + + self._format_model_perf(order_best, mae_error, mse_error, r_square) ) extra_terms_feature_vector = list( @@ -1558,9 +1561,7 @@ def polynomial_regression_fitting(self, additional_regression_features=None): if r_square > 0.95: self.fit_status = "ok" else: - warnings.warn( - "Polynomial regression generates poor fit for the dataset" - ) + _log.warning("Polynomial regression generates poor fit for the dataset") self.fit_status = "poor" self.pickle_save({"model": self}) @@ -1570,10 +1571,12 @@ def polynomial_regression_fitting(self, additional_regression_features=None): def get_feature_vector(self): """ - The ``get_feature_vector`` method generates the list of regression features from the column headers of the input dataset. + The ``get_feature_vector`` method generates the list of regression features + from the column headers of the input dataset. Returns: - Pyomo IndexedParam : An indexed parameter list of the variables supplied in the original data + Pyomo IndexedParam : An indexed parameter list of the variables + supplied in the original data **Example:** @@ -1601,17 +1604,20 @@ def get_feature_vector(self): def set_additional_terms(self, term_list): """ - ``set_additional_terms`` accepts additional user-defined features for consideration during regression. + ``set_additional_terms`` accepts additional user-defined features + for consideration during regression. Args: - term_list (list) : List of additional terms to be considered as regression features. Each term in the list must be a Pyomo-supported intrinsic function. + term_list (list) : List of additional terms to be considered as regression features. + Each term in the list must be a Pyomo-supported intrinsic function. **Example:** .. code-block:: python - # To add the sine and cosine of a variable with header 'X1' in the dataset as additional regression features: + # To add the sine and cosine of a variable with header 'X1' in the dataset + # as additional regression features: >>> xy_data = pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], orient='index', columns=['X1', 'X2', 'Y']) >>> A = PolynomialRegression(xy_data, xy_data, maximum_polynomial_order=5) >>> p = A.get_feature_vector() @@ -1625,11 +1631,15 @@ def training(self): """ The ``training`` method trains a polynomial model to an input dataset. - It calls the core method which is called in the PolynomialRegression class (polynomial_regression_fitting). - It accepts no user input, inheriting the information passed in class initialization. + It calls the core method which is called in the PolynomialRegression class + (polynomial_regression_fitting). + + It accepts no user input, inheriting the information passed in + class initialization. Returns: - tuple : Python Object (**results**) containing the results of the polynomial regression process including: + tuple : Python Object (**results**) containing the results of the + polynomial regression process, including: - the polynomial order (**self.final_polynomial_order**) - polynomial coefficients (**self.optimal_weights_array**), and - MAE and MSE errors as well as the :math:`R^{2}` (**results.errors**). @@ -1724,7 +1734,7 @@ def pickle_save(self, solutions): try: filehandler = open(self.filename, "wb") pickle.dump(solutions, filehandler) - print("\nResults saved in ", str(self.filename)) + _log.info(f"Results saved in: {self.filename}") except: raise IOError("File could not be saved.") diff --git a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py index b00f3b362a..715332399d 100644 --- a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py @@ -305,10 +305,11 @@ def test__init__08(self, array_type1, array_type2): @pytest.mark.unit @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @pytest.mark.parametrize("array_type2", [np.array, pd.DataFrame]) - def test__init__09(self, array_type1, array_type2): + def test__init__09(self, array_type1, array_type2, caplog): original_data_input = array_type1(self.test_data) regression_data_input = array_type2(self.sample_points) - with pytest.warns(Warning): + #with pytest.warns(Warning): + with caplog.at_level(idaes_logger.WARNING): PolyClass = PolynomialRegression( original_data_input, regression_data_input, @@ -318,6 +319,14 @@ def test__init__09(self, array_type1, array_type2): assert ( PolyClass.number_of_crossvalidations == 11 ) # Default number of cross-validations + got_warning = False + print("@@ printing records") + for record in caplog.records: + print(f"@@ record={record}") + if record.levelname == "WARNING" and "cross-validations" in record.message: + got_warning = True + break + assert got_warning @pytest.mark.unit @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @@ -1652,7 +1661,7 @@ def test_results_generation_01(self, array_type1, array_type2): row_list = np.array([["k"], ["(x_1)^1"], ["(x_2)^1"]]) expected_df = pd.concat( [ - expected_df, + #expected_df, pd.Series( { row_list[0, 0]: beta[0, 0], @@ -1694,7 +1703,7 @@ def test_results_generation_02(self, array_type1, array_type2): ) expected_df = pd.concat( [ - expected_df, + #expected_df, -- empty pd.Series( { row_list[0, 0]: beta[0, 0], @@ -1732,7 +1741,7 @@ def test_results_generation_03(self, array_type1, array_type2): ) expected_df = pd.concat( [ - expected_df, + #expected_df, --empty pd.Series( { row_list[0, 0]: beta[0, 0], From e629a5a142a66b517369a404f024269bc4d26350 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Mon, 10 Jun 2024 16:48:32 -0700 Subject: [PATCH 05/11] cleanup --- idaes/core/surrogate/pysmo/kriging.py | 4 +- .../surrogate/pysmo/tests/test_kriging.py | 226 +++++++++--------- 2 files changed, 116 insertions(+), 114 deletions(-) diff --git a/idaes/core/surrogate/pysmo/kriging.py b/idaes/core/surrogate/pysmo/kriging.py index fa20bc6875..1cb3925237 100644 --- a/idaes/core/surrogate/pysmo/kriging.py +++ b/idaes/core/surrogate/pysmo/kriging.py @@ -592,7 +592,7 @@ def predict_output(self, x_pred): ) return y_pred - def training(self): + def training(self) -> "KrigingModel": r""" Main function for Kriging training. @@ -603,7 +603,7 @@ def training(self): (4) A results object is generated by calling the ``ResultsReport`` class. Returns: - tuple : self object (**results**) containing the all information about the best Kriging model obtained, including: + KrigingModel : self object (**results**) containing the all information about the best Kriging model obtained, including: - the Kriging model hyperparameters (**results.optimal_weights**), - when relevant, the optimal regularization parameter found :math:`\lambda` (**results.regularization_parameter**), - the Kriging mean (**results.optimal_mean**), diff --git a/idaes/core/surrogate/pysmo/tests/test_kriging.py b/idaes/core/surrogate/pysmo/tests/test_kriging.py index ed33dc93d2..d682fcada9 100644 --- a/idaes/core/surrogate/pysmo/tests/test_kriging.py +++ b/idaes/core/surrogate/pysmo/tests/test_kriging.py @@ -54,84 +54,84 @@ class TestKrigingModel: @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__01(self, array_type): input_array = array_type(self.test_data) - KrigingClass = KrigingModel(input_array) - assert KrigingClass.num_grads == True - assert KrigingClass.regularization == True + kriging_class = KrigingModel(input_array) + assert kriging_class.num_grads is True + assert kriging_class.regularization is True @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__02(self, array_type): input_array = array_type(self.test_data) with pytest.raises(ValueError): - KrigingClass = KrigingModel(input_array) + KrigingModel(input_array) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__03(self, array_type): input_array = array_type(self.test_data) with pytest.raises(Exception): - KrigingClass = KrigingModel(input_array, numerical_gradients=1) + KrigingModel(input_array, numerical_gradients=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__04(self, array_type): input_array = array_type(self.test_data) with pytest.raises(Exception): - KrigingClass = KrigingModel(input_array, regularization=1) + KrigingModel(input_array, regularization=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__05(self, array_type): input_array = array_type(self.test_data) with pytest.raises(Exception): - KrigingClass = KrigingModel(input_array, overwrite=1) + KrigingModel(input_array, overwrite=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__06(self, array_type): input_array = array_type(self.test_data) with pytest.raises(Exception): - KrigingClass = KrigingModel(input_array, fname="solution.pkl") + KrigingModel(input_array, fname="solution.pkl") @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__07(self, array_type): input_array = array_type(self.test_data) with pytest.raises(Exception): - KrigingClass = KrigingModel(input_array, fname=1) + KrigingModel(input_array, fname=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__08(self, array_type): + def test__init__08(self, array_type, tmp_path): input_array = array_type(self.test_data) - file_name = "test_filename.pickle" - KrigingClass1 = KrigingModel(input_array, fname=file_name, overwrite=True) - results = KrigingClass1.training() - KrigingClass2 = KrigingModel(input_array, fname=file_name, overwrite=True) - assert KrigingClass1.filename == KrigingClass2.filename + file_name = str(tmp_path / "test_filename.pickle") + kriging_class1 = KrigingModel(input_array, fname=file_name, overwrite=True) + kriging_class1.training() + kriging_class2 = KrigingModel(input_array, fname=file_name, overwrite=True) + assert kriging_class1.filename == kriging_class2.filename @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__09(self, array_type): + def test__init__09(self, array_type, tmp_path): input_array = array_type(self.test_data) - file_name1 = "test_filename1.pickle" - file_name2 = "test_filename2.pickle" - KrigingClass1 = KrigingModel(input_array, fname=file_name1, overwrite=True) - results = KrigingClass1.training() - KrigingClass2 = KrigingModel(input_array, fname=file_name2, overwrite=True) - assert KrigingClass1.filename == file_name1 - assert KrigingClass2.filename == file_name2 + file_name1 = str(tmp_path / "test_filename1.pickle") + file_name2 = str(tmp_path / "test_filename2.pickle") + kriging_class1 = KrigingModel(input_array, fname=file_name1, overwrite=True) + kriging_class1.training() + kriging_class2 = KrigingModel(input_array, fname=file_name2, overwrite=True) + assert kriging_class1.filename == file_name1 + assert kriging_class2.filename == file_name2 @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_covariance_matrix_generator(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) p = 2 theta = np.array([1, 2]) reg_param = 1.00000000e-06 - cov_matrix = KrigingClass.covariance_matrix_generator( - KrigingClass.x_data_scaled, theta, reg_param, p + cov_matrix = kriging_class.covariance_matrix_generator( + kriging_class.x_data_scaled, theta, reg_param, p ) cov_matrix_exp = np.array( @@ -149,7 +149,7 @@ def test_covariance_matrix_generator(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_covariance_inverse_generator_01(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) cov_matrix = np.array( [ [1.000001, 0.60653066, 0.13533528], @@ -165,7 +165,7 @@ def test_covariance_inverse_generator_01(self, array_type): ] ) - inverse_x = KrigingClass.covariance_inverse_generator(cov_matrix) + inverse_x = kriging_class.covariance_inverse_generator(cov_matrix) np.testing.assert_array_equal( np.round(inverse_x, 7), np.round(cov_matrix_inv_exp, 7) ) @@ -174,16 +174,16 @@ def test_covariance_inverse_generator_01(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_covariance_inverse_generator_02(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) cov_matrix = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) - inverse_x = KrigingClass.covariance_inverse_generator(cov_matrix) + inverse_x = kriging_class.covariance_inverse_generator(cov_matrix) np.testing.assert_array_equal(np.round(inverse_x, 7), np.round(cov_matrix, 7)) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_kriging_mean(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) cov_matrix_inv = np.array( [ [1.82957788, -1.51792604, 0.67306158], @@ -191,7 +191,7 @@ def test_kriging_mean(self, array_type): [0.67306158, -1.51792604, 1.82957788], ] ) - kriging_mean = KrigingClass.kriging_mean(cov_matrix_inv, KrigingClass.y_data) + kriging_mean = kriging_class.kriging_mean(cov_matrix_inv, kriging_class.y_data) kriging_mean_exp = 20.18496 assert np.round(kriging_mean_exp, 5) == np.round(kriging_mean[0][0], 5) @@ -199,9 +199,9 @@ def test_kriging_mean(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_y_mu_calculation(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) kriging_mean = 20.18496 - y_mu = KrigingClass.y_mu_calculation(KrigingClass.y_data, kriging_mean) + y_mu = kriging_class.y_mu_calculation(kriging_class.y_data, kriging_mean) y_mu_exp = np.array([[-18.18496], [-6.93496], [16.81504]]) np.testing.assert_array_equal(np.round(y_mu, 5), np.round(y_mu_exp, 5)) @@ -209,7 +209,7 @@ def test_y_mu_calculation(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_kriging_sd(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) cov_matrix_inv = np.array( [ [1.82957788, -1.51792604, 0.67306158], @@ -218,8 +218,8 @@ def test_kriging_sd(self, array_type): ] ) y_mu_exp = np.array([[-18.18496], [-6.93496], [16.81504]]) - sigma_sq = KrigingClass.kriging_sd( - cov_matrix_inv, y_mu_exp, KrigingClass.y_data.shape[0] + sigma_sq = kriging_class.kriging_sd( + cov_matrix_inv, y_mu_exp, kriging_class.y_data.shape[0] ) sigma_sq_exp = 272.84104637 assert np.round(sigma_sq_exp, 5) == np.round(sigma_sq[0][0], 5) @@ -228,10 +228,10 @@ def test_kriging_sd(self, array_type): # @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) # def test_print_fun(self, array_type): # input_array = array_type(self.training_data) - # KrigingClass = KrigingModel(input_array[0:3], regularization=True) + # kriging_class = KrigingModel(input_array[0:3], regularization=True) # capturedOutput = io.StringIO() # sys.stdout = capturedOutput - # KrigingClass.print_fun(1, 2, 3.7) + # kriging_class.print_fun(1, 2, 3.7) # sys.stdout = sys.__stdout__ # assert "at minimum 2.0000 accepted 3\n" == capturedOutput.getvalue() @@ -239,11 +239,11 @@ def test_kriging_sd(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_objective_function(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) - conc_log_like = KrigingClass.objective_function( - var_vector, KrigingClass.x_data_scaled, KrigingClass.y_data, p + conc_log_like = kriging_class.objective_function( + var_vector, kriging_class.x_data_scaled, kriging_class.y_data, p ) conc_log_like_exp = 8.0408619 @@ -253,11 +253,11 @@ def test_objective_function(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_numerical_gradient_01(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=True) + kriging_class = KrigingModel(input_array[0:3], regularization=True) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) - grad_vec = KrigingClass.numerical_gradient( - var_vector, KrigingClass.x_data_scaled, KrigingClass.y_data, p + grad_vec = kriging_class.numerical_gradient( + var_vector, kriging_class.x_data_scaled, kriging_class.y_data, p ) grad_vec_exp = np.array([0, 0, 8.8817842e-10]) np.testing.assert_array_equal(np.round(grad_vec, 5), np.round(grad_vec_exp, 5)) @@ -266,11 +266,11 @@ def test_numerical_gradient_01(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_numerical_gradient_02(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=False) + kriging_class = KrigingModel(input_array[0:3], regularization=False) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) - grad_vec = KrigingClass.numerical_gradient( - var_vector, KrigingClass.x_data_scaled, KrigingClass.y_data, p + grad_vec = kriging_class.numerical_gradient( + var_vector, kriging_class.x_data_scaled, kriging_class.y_data, p ) grad_vec_exp = np.array([0, 0, 0]) np.testing.assert_array_equal(np.round(grad_vec, 5), np.round(grad_vec_exp, 5)) @@ -279,28 +279,28 @@ def test_numerical_gradient_02(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_parameter_optimization_01(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3]) + kriging_class = KrigingModel(input_array[0:3]) p = 2 np.random.seed(0) - opt_results = KrigingClass.parameter_optimization(p) + opt_results = kriging_class.parameter_optimization(p) assert len(opt_results.x) == 3 - assert opt_results.success == True + assert opt_results.success is True @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_parameter_optimization_02(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], numerical_gradients=False) + kriging_class = KrigingModel(input_array[0:3], numerical_gradients=False) p = 2 - opt_results = KrigingClass.parameter_optimization(p) + opt_results = kriging_class.parameter_optimization(p) assert len(opt_results.x) == 3 - assert opt_results.minimization_failures == False + assert not opt_results.minimization_failures @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_optimal_parameter_evaluation(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3]) + kriging_class = KrigingModel(input_array[0:3]) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) ( @@ -311,7 +311,7 @@ def test_optimal_parameter_evaluation(self, array_type): cov_mat, cov_inv, y_mu, - ) = KrigingClass.optimal_parameter_evaluation(var_vector, p) + ) = kriging_class.optimal_parameter_evaluation(var_vector, p) np.testing.assert_array_equal(theta, [10**1, 10**2]) np.testing.assert_array_equal(reg_param, 1.00000000e-06) @@ -319,7 +319,7 @@ def test_optimal_parameter_evaluation(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_error_calculation(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=False) + kriging_class = KrigingModel(input_array[0:3], regularization=False) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) ( @@ -330,16 +330,16 @@ def test_error_calculation(self, array_type): cov_mat, cov_inv, y_mu, - ) = KrigingClass.optimal_parameter_evaluation(var_vector, p) - y_prediction_exp = np.zeros((KrigingClass.x_data_scaled.shape[0], 1)) - for i in range(0, KrigingClass.x_data_scaled.shape[0]): + ) = kriging_class.optimal_parameter_evaluation(var_vector, p) + y_prediction_exp = np.zeros((kriging_class.x_data_scaled.shape[0], 1)) + for i in range(0, kriging_class.x_data_scaled.shape[0]): cmt = ( np.matmul( ( ( np.abs( - KrigingClass.x_data_scaled[i, :] - - KrigingClass.x_data_scaled + kriging_class.x_data_scaled[i, :] + - kriging_class.x_data_scaled ) ) ** p @@ -352,26 +352,26 @@ def test_error_calculation(self, array_type): np.matmul(cov_matrix_tests.transpose(), cov_inv), y_mu ) - ss_error, rmse_error, y_prediction = KrigingClass.error_calculation( + ss_error, rmse_error, y_prediction = kriging_class.error_calculation( theta, p, mean, cov_inv, y_mu, - KrigingClass.x_data_scaled, - KrigingClass.y_data, + kriging_class.x_data_scaled, + kriging_class.y_data, ) np.testing.assert_array_equal(y_prediction, y_prediction_exp) assert ( - np.sum((KrigingClass.y_data - y_prediction_exp) ** 2) - / KrigingClass.x_data_scaled.shape[0] + np.sum((kriging_class.y_data - y_prediction_exp) ** 2) + / kriging_class.x_data_scaled.shape[0] == ss_error ) assert ( np.sqrt( - np.sum((KrigingClass.y_data - y_prediction_exp) ** 2) - / KrigingClass.x_data_scaled.shape[0] + np.sum((kriging_class.y_data - y_prediction_exp) ** 2) + / kriging_class.x_data_scaled.shape[0] ) == rmse_error ) @@ -380,7 +380,7 @@ def test_error_calculation(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_r2_calculation(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=False) + kriging_class = KrigingModel(input_array[0:3], regularization=False) p = 2 var_vector = np.array([1, 2, 1.00000000e-06]) ( @@ -391,18 +391,18 @@ def test_r2_calculation(self, array_type): cov_mat, cov_inv, y_mu, - ) = KrigingClass.optimal_parameter_evaluation(var_vector, p) + ) = kriging_class.optimal_parameter_evaluation(var_vector, p) - ss_error, rmse_error, y_prediction = KrigingClass.error_calculation( + ss_error, rmse_error, y_prediction = kriging_class.error_calculation( theta, p, mean, cov_inv, y_mu, - KrigingClass.x_data_scaled, - KrigingClass.y_data, + kriging_class.x_data_scaled, + kriging_class.y_data, ) - r_square = KrigingClass.r2_calculation(KrigingClass.y_data, y_prediction) + r_square = kriging_class.r2_calculation(kriging_class.y_data, y_prediction) assert 0.999999999999 == r_square @pytest.mark.unit @@ -410,29 +410,29 @@ def test_r2_calculation(self, array_type): def test_predict_output_01(self, array_type): input_array = array_type(self.training_data) np.random.seed(0) - KrigingClass = KrigingModel(input_array) - results = KrigingClass.training() - y_pred = KrigingClass.predict_output(KrigingClass.x_data_scaled) - assert y_pred.shape[0] == KrigingClass.x_data_scaled.shape[0] + kriging_class = KrigingModel(input_array) + kriging_class.training() + y_pred = kriging_class.predict_output(kriging_class.x_data_scaled) + assert y_pred.shape[0] == kriging_class.x_data_scaled.shape[0] @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_predict_output(self, array_type): input_array = array_type(self.training_data) np.random.seed(0) - KrigingClass = KrigingModel(input_array) - results = KrigingClass.training() - y_pred = KrigingClass.predict_output(np.array([0.1, 0.2])) + kriging_class = KrigingModel(input_array) + kriging_class.training() + y_pred = kriging_class.predict_output(np.array([0.1, 0.2])) assert y_pred.shape[0] == 1 @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_training(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array[0:3], regularization=False) + kriging_class = KrigingModel(input_array[0:3], regularization=False) np.random.seed(0) p = 2 - bh_results = KrigingClass.parameter_optimization(p) + bh_results = kriging_class.parameter_optimization(p) # Calculate other variables and parameters ( optimal_theta, @@ -442,27 +442,27 @@ def test_training(self, array_type): optimal_cov_mat, opt_cov_inv, optimal_ymu, - ) = KrigingClass.optimal_parameter_evaluation(bh_results.x, p) + ) = kriging_class.optimal_parameter_evaluation(bh_results.x, p) # Training performance ( training_ss_error, rmse_error, y_training_predictions, - ) = KrigingClass.error_calculation( + ) = kriging_class.error_calculation( optimal_theta, p, optimal_mean, opt_cov_inv, optimal_ymu, - KrigingClass.x_data_scaled, - KrigingClass.y_data, + kriging_class.x_data_scaled, + kriging_class.y_data, ) - r2_training = KrigingClass.r2_calculation( - KrigingClass.y_data, y_training_predictions + r2_training = kriging_class.r2_calculation( + kriging_class.y_data, y_training_predictions ) np.random.seed(0) - results = KrigingClass.training() + results = kriging_class.training() np.testing.assert_array_equal(results.optimal_weights, optimal_theta) np.testing.assert_array_equal( results.regularization_parameter, optimal_reg_param @@ -479,17 +479,19 @@ def test_training(self, array_type): np.testing.assert_array_equal(results.training_R2, r2_training) np.testing.assert_array_equal(results.training_rmse, rmse_error) np.testing.assert_array_equal(results.optimal_p, p) - np.testing.assert_array_equal(results.x_data, KrigingClass.x_data) - np.testing.assert_array_equal(results.x_data_scaled, KrigingClass.x_data_scaled) - np.testing.assert_array_equal(results.x_data_min, KrigingClass.x_data_min) - np.testing.assert_array_equal(results.x_data_max, KrigingClass.x_data_max) + np.testing.assert_array_equal(results.x_data, kriging_class.x_data) + np.testing.assert_array_equal( + results.x_data_scaled, kriging_class.x_data_scaled + ) + np.testing.assert_array_equal(results.x_data_min, kriging_class.x_data_min) + np.testing.assert_array_equal(results.x_data_max, kriging_class.x_data_max) @pytest.mark.unit @pytest.mark.parametrize("array_type", [pd.DataFrame]) def test_get_feature_vector_01(self, array_type): input_array = array_type(self.full_data) - KrigingClass = KrigingModel(input_array, regularization=False) - p = KrigingClass.get_feature_vector() + kriging_class = KrigingModel(input_array, regularization=False) + p = kriging_class.get_feature_vector() expected_dict = {"x1": 0, "x2": 0} assert expected_dict == p.extract_values() @@ -497,8 +499,8 @@ def test_get_feature_vector_01(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_get_feature_vector_02(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array, regularization=False) - p = KrigingClass.get_feature_vector() + kriging_class = KrigingModel(input_array, regularization=False) + p = kriging_class.get_feature_vector() expected_dict = {0: 0, 1: 0} assert expected_dict == p.extract_values() @@ -506,38 +508,38 @@ def test_get_feature_vector_02(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_kriging_generate_expression(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array, regularization=False) - results = KrigingClass.training() - p = KrigingClass.get_feature_vector() + kriging_class = KrigingModel(input_array, regularization=False) + results = kriging_class.training() + p = kriging_class.get_feature_vector() lv = [] for i in p.keys(): lv.append(p[i]) - rbf_expr = results.generate_expression((lv)) + results.generate_expression(lv) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_pickle_load01(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array, regularization=False) - results = KrigingClass.training() - KrigingClass.pickle_load(KrigingClass.filename) + kriging_class = KrigingModel(input_array, regularization=False) + kriging_class.training() + kriging_class.pickle_load(kriging_class.filename) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_pickle_load02(self, array_type): input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array, regularization=False) + kriging_class = KrigingModel(input_array, regularization=False) with pytest.raises(Exception): - KrigingClass.pickle_load("file_not_existing.pickle") + kriging_class.pickle_load("file_not_existing.pickle") @pytest.mark.unit @patch("matplotlib.pyplot.show") @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test_parity_residual_plots(self, mock_show, array_type): + def test_parity_residual_plots(self, mock_show, array_type): # noqa input_array = array_type(self.training_data) - KrigingClass = KrigingModel(input_array, regularization=False) - results = KrigingClass.training() - KrigingClass.parity_residual_plots() + kriging_class = KrigingModel(input_array, regularization=False) + kriging_class.training() + kriging_class.parity_residual_plots() if __name__ == "__main__": From 3bf0947911ca4e3d81584552a68a8c31a1ecb9e0 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Mon, 10 Jun 2024 16:59:08 -0700 Subject: [PATCH 06/11] removed lint disablings --- .../surrogate/pysmo/polynomial_regression.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index fd2e219a1f..303d311a19 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -10,9 +10,6 @@ # All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md # for full copyright and license information. ################################################################################# -# TODO: Missing doc strings -# pylint: disable=missing-module-docstring -# pylint: disable=missing-function-docstring """ The purpose of this file is to perform polynomial regression in Pyomo. This will be done in two stages. First, a sampling plan will @@ -1021,8 +1018,7 @@ def results_generation(self, beta, order): print("\n------------------------------------------------------------") print("The final coefficients of the regression terms are: \n") print("k |", beta[0, 0]) - results_df = pd.concat([#results_df, - pd.Series({"k": beta[0, 0]})], axis=0) + results_df = pd.concat([pd.Series({"k": beta[0, 0]})], axis=0) # results_df, if self.multinomials == 1: for i in range(1, order + 1): for j in range(1, self.number_of_x_vars + 1): @@ -1740,11 +1736,12 @@ def pickle_save(self, solutions): @staticmethod def pickle_load(solution_file): - """ - pickle_load loads the results of a saved run 'file.obj'. It returns an array of two elements: the setup (index[0]) and the results (index[1]). + """Load the results of a saved run 'file.obj' - Input arguments: - solution_file : Pickle object file containing previous solution to be loaded. + Args: + solution_file: Pickle object file containing previous solution to be loaded. + returns: + An array of two elements, the setup (index[0]) and the results (index[1]). """ try: @@ -1821,7 +1818,8 @@ def _report(self): ) return s - def print_report(self): + def print_report(self) -> None: + """Print report to standard output""" s = self._report() print(s) @@ -1830,13 +1828,14 @@ def _repr_pretty_(self, p, cycle=False): s = self._report() p.text(s) - def confint_regression(self, confidence=0.95): - """ - The ``confint_regression`` method prints the confidence intervals for the regression patamaters. + def confint_regression(self, confidence: float = 0.95) -> pd.DataFrame: + """Print the confidence intervals for the regression parameters. Args: - confidence : Required confidence interval level, default = 0.95 (95%) + confidence: Required confidence interval level, default = 0.95 (95%) + Returns: + Data frame with values that were printed. """ data = self.final_training_data From ca378bfd705b767c525aceb53c1aca230c8a194b Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Tue, 11 Jun 2024 07:39:39 -0700 Subject: [PATCH 07/11] more changes --- idaes/core/surrogate/pysmo/__init__.py | 6 + .../surrogate/pysmo/polynomial_regression.py | 363 +++++++----------- .../surrogate/pysmo/radial_basis_function.py | 36 +- idaes/core/surrogate/pysmo/tests/__init__.py | 8 + .../pysmo/tests/test_polynomial_regression.py | 52 ++- .../pysmo/tests/test_radial_basis_function.py | 18 +- idaes/core/surrogate/pysmo/utils.py | 17 + pytest.ini | 7 +- 8 files changed, 226 insertions(+), 281 deletions(-) diff --git a/idaes/core/surrogate/pysmo/__init__.py b/idaes/core/surrogate/pysmo/__init__.py index e69de29bb2..e00bfa855f 100644 --- a/idaes/core/surrogate/pysmo/__init__.py +++ b/idaes/core/surrogate/pysmo/__init__.py @@ -0,0 +1,6 @@ +import logging + +# Make sure logs are propagated up to root +_log = logging.getLogger(__name__) +_log.propagate = True + diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index 303d311a19..c7dc0b3540 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -33,6 +33,7 @@ import os.path import pickle +from typing import Union # Imports from third parties from matplotlib import pyplot as plt @@ -57,7 +58,7 @@ from pyomo.core.expr.visitor import replace_expressions # Imports from IDAES namespace -from idaes.core.surrogate.pysmo.utils import NumpyEvaluator +from idaes.core.surrogate.pysmo.utils import NumpyEvaluator, date_versioned_filename from idaes.logger import getIdaesLogger __author__ = "Oluwamayowa Amusat" @@ -188,22 +189,30 @@ class PolynomialRegression: """ - MAX_POLY = 10 # maximum polynomial order + #: maximum of maximum polynomial order + MAX_MAXPOLY = 10 + + #: known solution methods for argument 'solution_method' + SOLUTION_METHODS = { + "mle": "maximum likelihood", + "bfgs": "BFGS", + "pyomo": "Pyomo optimization", + } def __init__( self, - original_data_input, - regression_data_input, - maximum_polynomial_order, - number_of_crossvalidations=None, - no_adaptive_samples=None, - training_split=None, - max_fraction_training_samples=None, - max_iter=None, - solution_method=None, - multinomials=None, - fname=None, - overwrite=False, + original_data_input: Union[pd.DataFrame, np.ndarray], + regression_data_input: Union[pd.DataFrame, np.ndarray], + maximum_polynomial_order: int, + number_of_crossvalidations: int = 3, + no_adaptive_samples: int = 4, + training_split: float = 0.75, + max_fraction_training_samples: float = 0.5, + max_iter: int = None, + solution_method: str = "pyomo", + multinomials: int = 1, + fname: str = "solution.pickle", + overwrite: bool = False, ): """ Initialization of PolynomialRegression class. @@ -226,88 +235,53 @@ def __init__( multinomials(bool): This option determines whether or not multinomial terms are considered during polynomial fitting. Takes 0 for No and 1 for Yes. Default = 1. - Returns: - **self** object containing all the input information. - Raises: - ValueError: - - The input datasets (**original_data_input** or **regression_data_input**) are of the wrong type (not Numpy arrays or Pandas Dataframes) - - Exception: - * **maximum_polynomial_order** is not a positive, non-zero integer or **maximum_polynomial_order** is higher than the number of training samples available - Exception: - * **solution_method** is not 'mle', 'pyomo' or 'bfgs - Exception: - - **multinomials** is not binary (0 or 1) - Exception: - - **training_split** is not between 0 and 1 - Exception: - - **number_of_crossvalidations** is not a positive, non-zero integer - Exception: - - **max_fraction_training_samples** is not between 0 and 1 - Exception: - - **no_adaptive_samples** is not a positive, non-zero integer - Exception: - - **max_iter** is not a positive, non-zero integer - - warnings.warn: - - When the number of cross-validations is too high, i.e. number_of_crossvalidations > 10 + TypeError: if inputs are of the wrong type + ValueError: if any of the below is true + - The input datasets (original_data_input or regression_data_input) + are of the wrong type (not Numpy arrays or Pandas Dataframes) + - `maximum_polynomial_order` is not a positive, non-zero integer or + is more than the number of available training samples + - `solution_method` is not one of the keys in `SOLUTION_METHODS` + - `multinomials` is not binary (0 or 1) + - `training_split` is not between 0 and 1 + - `number_of_crossvalidations` is not positive + - `max_fraction_training_samples` is not between 0 and 1 + - `no_adaptive_samples` is not positive + - `max_iter` is not positive """ + _log.info("PolynomialRegression constructor:begin") - # print( - # "\n===========================Polynomial Regression===============================================\n" - # ) - _log.info("Polynomial Regression (begin)") - # Checks if fname is provided or exists in the path if not isinstance(overwrite, bool): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("overwrite must be boolean.") + raise ValueError("overwrite must be boolean") self.overwrite = overwrite - if fname is None: - fname = "solution.pickle" - self.filename = "solution.pickle" - elif ( - not isinstance(fname, str) - or os.path.splitext(fname)[-1].lower() != ".pickle" - ): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - 'fname must be a string with extension ".pickle". Please correct.' - ) + + if not isinstance(fname, str): + raise ValueError("'fname' argument must be a str") + if not fname.endswith(".pickle"): + raise ValueError("'fname' argument must have extension '.pickle'") if ( os.path.exists(fname) and overwrite is True ): # Explicit overwrite done by user - _log.warning( - f"Warning: '{fname}' exists, previous file will be overwritten" - ) + _log.warning(f"'{fname}' exists, previous file will be overwritten") self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting - self.filename = ( - os.path.splitext(fname)[0] - + "_v_" - + pd.Timestamp.today().strftime("%m-%d-%y_%H%M%S") - + ".pickle" - ) - _log.warning( - f"Warning: '{fname}' exists, " - f"results will be saved to {self.filename}" - ) - # self.filename = 'solution.pickle' + self.filename = date_versioned_filename(fname) + _log.warning(f"'{fname}' exists, saving to '{self.filename}'") elif os.path.exists(fname) is False: self.filename = fname if isinstance(original_data_input, pd.DataFrame): original_data = original_data_input.values - # FIXME: if we add an option to specify the response column, this needs to change + # FIXME: if we add an option to specify the response column, + # this needs to change self.regression_data_columns = list(original_data_input.columns)[:-1] elif isinstance(original_data_input, np.ndarray): original_data = original_data_input self.regression_data_columns = list(range(original_data_input.shape[1] - 1)) else: - raise ValueError( - "original_data_input: Pandas dataframe or numpy array required." + raise TypeError( + "'original_data_input' must be Pandas dataframe or numpy ndarray" ) if isinstance(regression_data_input, pd.DataFrame): @@ -315,166 +289,114 @@ def __init__( elif isinstance(regression_data_input, np.ndarray): regression_data = regression_data_input else: - raise ValueError( - "regression_data_input: Pandas dataframe or numpy array required." + raise TypeError( + "'regression_data_input' must be Pandas dataframe or numpy ndarray" ) # Check for potential dimensionality problems in input data if regression_data.shape[0] > original_data.shape[0]: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "The sampled data has more entries than the original dataset." + raise ValueError( + "Sampled data in 'regression_data_input' has more entries " + "than original data in 'original_data_input'" ) - elif regression_data.shape[1] != original_data.shape[1]: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Dimensional discrepancies in the dimensions of the original and regression datasets." + if regression_data.shape[1] != original_data.shape[1]: + raise ValueError( + "Sampled data in 'regression_data_input' has different " + "dimensions than original data in 'original_data_input'" ) - elif (regression_data.shape[1] == 1) or (original_data.shape[1] == 1): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Input data requires at least two dimensions (X and Y data)." + if (regression_data.shape[1] == 1) or (original_data.shape[1] == 1): + raise ValueError( + "Input data requires at least two dimensions (X and Y data)" ) self.original_data = original_data self.regression_data = regression_data - num_cross = number_of_crossvalidations - if num_cross is None: - num_cross = 3 - _log.info(f"Use default number of cross-validations: {num_cross}") - elif num_cross > 10: - msg = ( - f"The number of cross-validations ({num_cross}) is large. " - f"The simulation may take a while to run" + ncross, aname = number_of_crossvalidations, "number_of_crossvalidations" + if not isinstance(ncross, int): + self._bad_arg(ncross, aname, "must be an integer", type_error=True) + if ncross <= 0: + self._bad_arg(ncross, aname, "must be > 0") + if ncross > 10: + _log.warning( + f"Number of cross-validations ({ncross}) " + f"is large, the simulation may take a while to run" ) - # warnings.warn(msg) - _log.warning(msg) - self.number_of_crossvalidations = num_cross + self.number_of_crossvalidations = number_of_crossvalidations - max_poly = maximum_polynomial_order + max_poly, aname = maximum_polynomial_order, "maximum_polynomial_order" if not isinstance(max_poly, int): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Maximum polynomial order must be an integer") - elif max_poly > self.MAX_POLY: - msg = ( + self._bad_arg(max_poly, aname, "must be an integer", type_error=True) + if max_poly <= 0: + self._bad_arg(max_poly, aname, "must be > 0") + if max_poly > self.MAX_MAXPOLY: + _log.warning( f"Maximum polynomial order value ({max_poly})" - f"reduced maximum allowed ({self.MAX_POLY})" + f"reduced to maximum allowed ({self.MAX_MAXPOLY})" + ) + max_poly = self.MAX_MAXPOLY + if max_poly >= regression_data.shape[0]: + self._bad_arg( + max_poly, aname, "too high for the number of samples supplied" ) - # warnings.warn(msg) - _log.warning(msg) - max_poly = 10 self.max_polynomial_order = max_poly self.number_of_x_vars = regression_data.shape[1] - 1 - if training_split is None: - _log.warning("The default training/cross-validation split of 0.75 is used.") - training_split = 0.75 - elif training_split >= 1 or training_split <= 0: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Fraction of samples used for training must be between 0 and 1" - ) + if not (0 < training_split < 1): + self._bad_arg(training_split, "training_split", "must be between 0 and 1") self.fraction_training = training_split - if no_adaptive_samples is None: - no_adaptive_samples = 4 - self.no_adaptive_samples = no_adaptive_samples + samp, aname = no_adaptive_samples, "no_adaptive_samples" + if not isinstance(samp, int): + self._bad_arg(samp, aname, "must be an integer", type_error=True) + if samp < 0: + self._bad_arg(samp, aname, "must be positive") + self.no_adaptive_samples = samp self.number_of_samples = regression_data.shape[0] - if max_fraction_training_samples is None: - max_fraction_training_samples = 0.5 - elif max_fraction_training_samples > 1 or max_fraction_training_samples < 0: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "The fraction for the maximum number of training samples must be between 0 and 1" + if not (0 <= max_fraction_training_samples <= 1): + self._bad_arg( + max_fraction_training_samples, + "max_fraction_training_samples", + "must be between 0 and 1", ) self.max_fraction_training_samples = max_fraction_training_samples + aname = "max_iter" + if (regression_data.shape[0] < original_data.shape[0]) and max_iter is None: max_iter = 10 if ( regression_data.shape[0] == original_data.shape[0] or no_adaptive_samples == 0 ): - _log.warning("No iterations will be run.") + _log.warning("No iterations will be run") max_iter = 0 + if not isinstance(max_iter, int): + self._bad_arg(max_iter, aname, "must be an integer", type_error=True) + if max_iter < 0: + self._bad_arg(max_iter, aname, "must be positive") self.max_iter = max_iter - # Ensure all other key variables are integers - if not isinstance(self.number_of_crossvalidations, int): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Number of cross-validations must be an integer") - elif not isinstance(self.no_adaptive_samples, int): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Number of adaptive samples must be an integer") - elif not isinstance(self.max_iter, int): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Maximum number of iterations must be an integer") - elif self.max_polynomial_order >= regression_data.shape[0]: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "max_polynomial_order too high for the number of samples supplied" - ) - - if (self.max_polynomial_order <= 0) or (self.number_of_crossvalidations <= 0): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "maximum_polynomial_order and number_of_crossvalidations must be positive, non-zero integers" - ) - elif (self.no_adaptive_samples < 0) or (self.max_iter < 0): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("no_adaptive_samples and max_iter must be positive") - - if solution_method is None: - solution_method = "pyomo" - self.solution_method = solution_method - _log.warning("Using default parameter estimation method") - elif not isinstance(solution_method, str): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Invalid solution method. Must be of type .") - elif ( - (solution_method.lower() == "mle") - or (solution_method.lower() == "pyomo") - or (solution_method.lower() == "bfgs") - ): - solution_method = solution_method.lower() - self.solution_method = solution_method - else: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - 'Invalid parameter estimation method entered. Select one of maximum likelihood (solution_method="mle"), Pyomo optimization (solution_method="pyomo") or BFGS (solution_method="bfgs") methods. ' + meth, aname = solution_method.lower(), "solution_method" + if not isinstance(meth, str): + self._bad_arg(meth, aname, "must be a string", type_error=True) + if meth not in self.SOLUTION_METHODS: + method_list = ", ".join( + [f"'{k}'={v}" for k, v in self.SOLUTION_METHODS.items()] ) + self._bad_arg(meth, aname, f"not in known methods: {method_list}") + self.solution_method = meth _log.info(f"Parameter estimation method: {self.solution_method}") - if multinomials is None: - self.multinomials = 1 - elif multinomials == 1: - self.multinomials = 1 - elif multinomials == 0: - self.multinomials = 0 - else: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - 'Multinomial must be binary: input "1" for "Yes" and "0" for "No". ' - ) + aname = "multinomials" + if not isinstance(multinomials, int): + self._bad_arg(multinomials, aname, "must be an integer", type_error=True) + if multinomials not in (0, 1): + self._bad_arg(multinomials, aname, "must be 0 (no) or 1 (yes)") + self.multinomials = multinomials self.feature_list = [] self.additional_term_expressions = [] @@ -492,6 +414,23 @@ def __init__( self.extra_terms_feature_vector = None self.fit_status = None + _log.info("PolynomialRegression constructor:end status=OK") + + @staticmethod + def _bad_arg(arg, name: str, why: str, type_error: bool = False, show: bool = True): + """Utility function to normalize raising of type and value errors + encountered during argument validation. + """ + _log.warning( + f"PolynomialRegression constructor:end " + f"status={'Type' if type_error else 'Value'}Error arg={name}" + ) + s = f"argument '{name}' ({arg}) {why}" if show else f"argument '{name}' {why}" + if type_error: + raise TypeError(s) + raise ValueError(s) + + @staticmethod def _format_model_perf(order, mae_error, mse_error, r_square, r_square_adj=None): s = ( @@ -534,13 +473,9 @@ def training_test_data_creation(self, additional_features=None): cross_val_data = {} num_training = int(np.around(self.number_of_samples * self.fraction_training)) if num_training == 0: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("The inputted of fraction_training is too low.") + raise ValueError("The inputted of fraction_training is too low.") elif num_training == self.number_of_samples: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("The inputted of fraction_training is too high.") + raise ValueError("The inputted of fraction_training is too high.") for i in range(1, self.number_of_crossvalidations + 1): np.random.seed(i) if additional_features is None: @@ -1153,10 +1088,11 @@ def user_defined_terms(self, additional_regression_features): i ].values else: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Wrong data dimensions or type - additional_regression_features contain 1-D vectors, have same number of entries as regression_data and be of type pd.Series, pd.Dataframe or np.ndarray." + raise ValueError( + "Argument 'additional_regression_features' " + "must contain 1-D vectors, have same number of entries as " + "regression_data and be of type " + "pd.Series, pd.Dataframe or np.ndarray." ) return additional_features_array @@ -1305,11 +1241,11 @@ def polynomial_regression_fitting(self, additional_regression_features=None): ) ] adaptive_samples = sorted_comparison_vector_unique[ - # pylint considers self.no_adaptive_samples to be None here - # pylint: disable=invalid-unary-operand-type + # pylint considers self.no_adaptive_samples to be None here ???? + # should-pylint: disable=invalid-unary-operand-type? -self.no_adaptive_samples :, :, - # pylint: enable=invalid-unary-operand-type + # should-pylint: enable=invalid-unary-operand-type ] self.regression_data = np.concatenate( (self.regression_data, adaptive_samples), axis=0 @@ -1744,13 +1680,8 @@ def pickle_load(solution_file): An array of two elements, the setup (index[0]) and the results (index[1]). """ - try: - filehandler = open(solution_file, "rb") - return pickle.load(filehandler) - except: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("File could not be loaded.") + filehandler = open(solution_file, "rb") + return pickle.load(filehandler) def parity_residual_plots(self): """ diff --git a/idaes/core/surrogate/pysmo/radial_basis_function.py b/idaes/core/surrogate/pysmo/radial_basis_function.py index 6d48a6a0af..9be5dc5249 100644 --- a/idaes/core/surrogate/pysmo/radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/radial_basis_function.py @@ -10,15 +10,10 @@ # All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md # for full copyright and license information. ################################################################################# -# TODO: Missing doc strings -# pylint: disable=missing-module-docstring -# pylint: disable=missing-function-docstring - # pylint: disable=consider-using-enumerate # Imports from the python standard library import os.path -import warnings import pickle # Imports from third parties @@ -43,9 +38,15 @@ # Imports from IDAES namespace from idaes.core.surrogate.pysmo.sampling import FeatureScaling as fs +from idaes.core.surrogate.pysmo.utils import date_versioned_filename +from idaes.logger import getIdaesLogger __author__ = "Oluwamayowa Amusat" +# Logging +_log = getIdaesLogger(__name__, tag="surrogate") + + """ The purpose of this file is to perform radial basis functions in Pyomo. """ @@ -271,26 +272,13 @@ def __init__( if ( os.path.exists(fname) and overwrite is True ): # Explicit overwrite done by user - print( - "Warning:", - fname, - "already exists; previous file will be overwritten.\n", - ) + _log.warning(f"file '{fname}' exists; " + f"previous file will be overwritten") self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting - self.filename = ( - os.path.splitext(fname)[0] - + "_v_" - + pd.Timestamp.today().strftime("%m-%d-%y_%H%M%S") - + ".pickle" - ) - print( - "Warning:", - fname, - 'already exists; results will be saved to "', - self.filename, - '".\n', - ) + self.filename = date_versioned_filename(fname) + _log.warning(f"'{fname}' exists, results will be saved " + f"to '{self.filename}'") # self.filename = 'solution.pickle' elif os.path.exists(fname) is False: self.filename = fname @@ -1105,7 +1093,7 @@ def training(self): if x_condition_number < (1 / np.finfo(float).eps): self.solution_status = "ok" else: - warnings.warn( + _log.warning( "The parameter matrix A in A.x=B is ill-conditioned (condition number > 1e10). The solution returned may be inaccurate or unstable - inspect rmse error. Regularization (if not already done) may improve solution" ) self.solution_status = "unstable solution" diff --git a/idaes/core/surrogate/pysmo/tests/__init__.py b/idaes/core/surrogate/pysmo/tests/__init__.py index e69de29bb2..01653a849d 100644 --- a/idaes/core/surrogate/pysmo/tests/__init__.py +++ b/idaes/core/surrogate/pysmo/tests/__init__.py @@ -0,0 +1,8 @@ +def logs_got_warning(records, text: str = "") -> bool: + got_warning = False + for record in records: + if record.levelname == "WARNING" and text in record.msg: + got_warning = True + break + return got_warning + diff --git a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py index 715332399d..f61e6dacfe 100644 --- a/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/tests/test_polynomial_regression.py @@ -18,13 +18,14 @@ PolynomialRegression, FeatureScaling, ) +from idaes.core.surrogate.pysmo.tests import logs_got_warning import numpy as np import pandas as pd import pytest -from idaes import logger as idaes_logger +import logging -# Turn down the logging during the test -polynomial_regression.set_log_level(idaes_logger.ERROR) +# you're killing me, Smalls +logging.getLogger("idaes").handlers = [] class TestFeatureScaling: @@ -242,7 +243,7 @@ def test__init__02(self, array_type1, array_type2): def test__init__03(self, array_type1, array_type2): original_data_input = array_type1(self.test_data) regression_data_input = array_type2(self.sample_points) - with pytest.raises(ValueError): + with pytest.raises((TypeError, ValueError)): PolyClass = PolynomialRegression( original_data_input, regression_data_input, maximum_polynomial_order=5 ) @@ -253,8 +254,8 @@ def test__init__03(self, array_type1, array_type2): def test__init__04(self, array_type1, array_type2): original_data_input = array_type1(self.test_data) regression_data_input = array_type2(self.sample_points) - with pytest.raises(ValueError): - PolyClass = PolynomialRegression( + with pytest.raises((TypeError, ValueError)): + PolynomialRegression( original_data_input, regression_data_input, maximum_polynomial_order=5 ) @@ -306,27 +307,19 @@ def test__init__08(self, array_type1, array_type2): @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @pytest.mark.parametrize("array_type2", [np.array, pd.DataFrame]) def test__init__09(self, array_type1, array_type2, caplog): + polynomial_regression.set_log_level(logging.WARNING) original_data_input = array_type1(self.test_data) regression_data_input = array_type2(self.sample_points) - #with pytest.warns(Warning): - with caplog.at_level(idaes_logger.WARNING): - PolyClass = PolynomialRegression( - original_data_input, - regression_data_input, - maximum_polynomial_order=5, - number_of_crossvalidations=11, - ) - assert ( + PolyClass = PolynomialRegression( + original_data_input, + regression_data_input, + maximum_polynomial_order=5, + number_of_crossvalidations=11, + ) + assert ( PolyClass.number_of_crossvalidations == 11 ) # Default number of cross-validations - got_warning = False - print("@@ printing records") - for record in caplog.records: - print(f"@@ record={record}") - if record.levelname == "WARNING" and "cross-validations" in record.message: - got_warning = True - break - assert got_warning + assert logs_got_warning(caplog.records, "cross-validations") @pytest.mark.unit @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @@ -342,14 +335,14 @@ def test__init__10(self, array_type1, array_type2): @pytest.mark.unit @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @pytest.mark.parametrize("array_type2", [np.array, pd.DataFrame]) - def test__init__11(self, array_type1, array_type2): + def test__init__11(self, array_type1, array_type2, caplog): original_data_input = array_type1(self.test_data_large) regression_data_input = array_type2(self.sample_points_large) - with pytest.warns(Warning): - PolyClass = PolynomialRegression( - original_data_input, regression_data_input, maximum_polynomial_order=11 - ) - assert PolyClass.max_polynomial_order == 10 + PolyClass = PolynomialRegression( + original_data_input, regression_data_input, maximum_polynomial_order=11 + ) + assert PolyClass.max_polynomial_order == 10 + assert logs_got_warning(caplog.records, "polynomial order") @pytest.mark.unit @pytest.mark.parametrize("array_type1", [np.array, pd.DataFrame]) @@ -2236,5 +2229,6 @@ def test_confint_regression_03(self): ) + if __name__ == "__main__": pytest.main() diff --git a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py index 24e0dccfa0..5ac807dd1c 100644 --- a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py @@ -23,6 +23,7 @@ import pandas as pd from scipy.spatial import distance import pytest +from idaes.core.surrogate.pysmo.tests import logs_got_warning class TestFeatureScaling: @@ -1923,7 +1924,7 @@ def test_rbf_training_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test_rbf_training_02(self, array_type): + def test_rbf_training_02(self, array_type, caplog): input_array = array_type(self.test_data) data_feed = RadialBasisFunctions( input_array, @@ -1932,13 +1933,14 @@ def test_rbf_training_02(self, array_type): regularization=False, ) data_feed.training() - with pytest.warns(Warning): - results = data_feed.training() - assert data_feed.solution_status == "unstable solution" + + results = data_feed.training() + assert data_feed.solution_status == "unstable solution" + assert logs_got_warning(caplog.records) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test_rbf_training_03(self, array_type): + def test_rbf_training_03(self, array_type, caplog): input_array = array_type(self.test_data) data_feed = RadialBasisFunctions( input_array, @@ -1947,9 +1949,9 @@ def test_rbf_training_03(self, array_type): regularization=False, ) data_feed.training() - with pytest.warns(Warning): - data_feed.training() - assert data_feed.solution_status == "unstable solution" + data_feed.training() + assert data_feed.solution_status == "unstable solution" + assert logs_got_warning(caplog.records) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) diff --git a/idaes/core/surrogate/pysmo/utils.py b/idaes/core/surrogate/pysmo/utils.py index 422df31561..eaf91bbdd6 100644 --- a/idaes/core/surrogate/pysmo/utils.py +++ b/idaes/core/surrogate/pysmo/utils.py @@ -29,6 +29,9 @@ __author__ = "Oluwamayowa Amusat, John Siirola" +from datetime import datetime +import os + from pyomo.core import expr as EXPR, native_types from pyomo.core.expr.numvalue import value @@ -98,3 +101,17 @@ def beforeChild(self, node, child, child_idx): # Assume everything else is a constant... # return False, value(child) + + +def date_versioned_filename(fname: str, usec: bool = False) -> str: + """Create versioned filename using current date. + If 'usec' is True, precision is microseconds, otherwise seconds. + """ + base_fname = os.path.splitext(fname)[0] + now = datetime.now() + if usec: + date_str = now.isoformat("_", "microseconds").replace(".", "_") + else: + date_str = now.isoformat("_", "seconds") + date_str = date_str.replace(":", "") + return f"{base_fname}_v_{date_str}.pickle" diff --git a/pytest.ini b/pytest.ini index ccd02e93f6..e05779e962 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,7 +1,6 @@ [pytest] addopts = --durations=100 --durations-min=2 -log_file = pytest.log -log_file_date_format = %Y-%m-%dT%H:%M:%S -log_file_format = %(asctime)s %(levelname)-7s <%(filename)s:%(lineno)d> %(message)s -log_file_level = INFO \ No newline at end of file +log_cli=false +log_file=pytest.log +log_file_level=WARNING From 1624e67b66d0fe2350fa6861213ebaf84eed23aa Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Tue, 11 Jun 2024 10:26:23 -0700 Subject: [PATCH 08/11] radial basis tests pass --- .../surrogate/pysmo/radial_basis_function.py | 159 +++++----- idaes/core/surrogate/pysmo/sampling.py | 5 + .../pysmo/tests/test_radial_basis_function.py | 273 ++++++------------ 3 files changed, 159 insertions(+), 278 deletions(-) diff --git a/idaes/core/surrogate/pysmo/radial_basis_function.py b/idaes/core/surrogate/pysmo/radial_basis_function.py index 9be5dc5249..555da40eaa 100644 --- a/idaes/core/surrogate/pysmo/radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/radial_basis_function.py @@ -190,13 +190,23 @@ class RadialBasisFunctions: """ + #: known solution methods for argument 'solution_method' + SOLUTION_METHODS = { + "algebraic": "ALGEBRAIC", + "bfgs": "BFGS", + "pyomo": "Pyomo optimization", + } + + #: known basis functions, see docs for details + BASIS_FUNCTIONS = {"linear", "cubic", "gaussian", "mq", "imq", "spline"} + def __init__( self, XY_data, - basis_function=None, - solution_method=None, - regularization=None, - fname=None, + basis_function: str = "gaussian", + solution_method: str = "algebraic", + regularization: bool = True, + fname: str = "solution.pickle", overwrite=False, ): r""" @@ -252,33 +262,24 @@ def __init__( >>> d = RadialBasisFunctions(XY_data, basis_function='gaussian') """ + _log.info(f"RadialBasisFunctions constructor:begin") if not isinstance(overwrite, bool): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("overwrite must be boolean.") + self._bad_arg(overwrite, "overwrite", "must be boolean", type_error=True) self.overwrite = overwrite - if fname is None: - fname = "solution.pickle" - self.filename = "solution.pickle" - elif ( - not isinstance(fname, str) - or os.path.splitext(fname)[-1].lower() != ".pickle" - ): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - 'fname must be a string with extension ".pickle". Please correct.' - ) - if ( - os.path.exists(fname) and overwrite is True - ): # Explicit overwrite done by user - _log.warning(f"file '{fname}' exists; " - f"previous file will be overwritten") + + if not isinstance(fname, str): + self._bad_arg(fname, "fname", "must be a string", type_error=True) + if not fname.endswith(".pickle"): + self._bad_arg(fname, "fname", "must have extension '.pickle'") + # Explicit overwrite done by user + if os.path.exists(fname) and overwrite is True: + _log.warning(f"file '{fname}' exists, previous file will be overwritten") self.filename = fname elif os.path.exists(fname) and overwrite is False: # User is not overwriting self.filename = date_versioned_filename(fname) - _log.warning(f"'{fname}' exists, results will be saved " - f"to '{self.filename}'") + _log.warning( + f"'{fname}' exists, results will be saved " f"to '{self.filename}'" + ) # self.filename = 'solution.pickle' elif os.path.exists(fname) is False: self.filename = fname @@ -302,65 +303,34 @@ def __init__( self.y_data = y_data_scaled.reshape(self.y_data_unscaled.shape) self.centres = xy_data_scaled[:, :-1] - if solution_method is None: - solution_method = "algebraic" - self.solution_method = solution_method - print("Default parameter estimation method is used.") - elif not isinstance(solution_method, str): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Invalid solution method. Must be of type .") - elif ( - (solution_method.lower() == "algebraic") - or (solution_method.lower() == "pyomo") - or (solution_method.lower() == "bfgs") - ): - solution_method = solution_method.lower() - self.solution_method = solution_method - else: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - 'Invalid solution method entered. Select one of ALGEBRAIC (solution_method="algebraic") , L-BFGS (solution_method="bfgs") or Pyomo optimization (solution_method="pyomo") methods. ' + meth, aname = solution_method, "solution_method" + if not isinstance(meth, str): + self._bad_arg(meth, aname, "must be a string", type_error=True) + meth = meth.lower() + if meth not in self.SOLUTION_METHODS: + method_list = ", ".join( + [f"'{k}'={v}" for k, v in self.SOLUTION_METHODS.items()] ) - print("\nParameter estimation method: ", self.solution_method) - - if basis_function is None: - basis_function = "gaussian" - self.basis_function = basis_function - print("Gaussian basis function is used.") - elif not isinstance(basis_function, str): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Invalid basis_function. Must be of type .") - elif ( - (basis_function.lower() == "linear") - or (basis_function.lower() == "cubic") - or (basis_function.lower() == "gaussian") - or (basis_function.lower() == "mq") - or (basis_function.lower() == "imq") - or (basis_function.lower() == "spline") - ): - basis_function = basis_function.lower() - self.basis_function = basis_function - else: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Invalid basis function entered. See manual for available options. " + self._bad_arg(meth, aname, f"not in known methods: {method_list}") + self.solution_method = meth + _log.info(f"Parameter estimation method: {self.solution_method}") + + bfunc, aname = basis_function, "basis_function" + if not isinstance(bfunc, str): + self._bad_arg(bfunc, aname, "must be a string", type_error=True) + bfunc = bfunc.lower() + if bfunc not in self.BASIS_FUNCTIONS: + bfunc_list = ", ".join(list(self.BASIS_FUNCTIONS)) + self._bad_arg(bfunc, aname, f"not in known methods: {bfunc_list}") + self.basis_function = bfunc + _log.info(f"Basis function: {self.basis_function}") + + if not isinstance(regularization, bool): + self._bad_arg( + regularization, "regularization", "must be boolean", type_error=True ) - print("Basis function: ", self.basis_function) - - if regularization is None: - regularization = True - self.regularization = regularization - elif not isinstance(regularization, bool): - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("Invalid basis_function. Must be boolean") - elif (regularization is True) or (regularization is False): - self.regularization = regularization - print("Regularization done: ", self.regularization) + self.regularization = regularization + _log.info(f"Regularization done: {self.regularization}") # Results self.weights = None @@ -376,6 +346,20 @@ def __init__( self.y_data_max = None self.solution_status = None + @staticmethod + def _bad_arg(arg, name: str, why: str, type_error: bool = False, show: bool = True): + """Utility function to normalize raising of type and value errors + encountered during argument validation. + """ + _log.warning( + f"RadialBasisFunctions constructor:end " + f"status={'Type' if type_error else 'Value'}Error arg={name}" + ) + s = f"argument '{name}' ({arg}) {why}" if show else f"argument '{name}' {why}" + if type_error: + raise TypeError(s) + raise ValueError(s) + def r2_distance(self, c): """ The function r2_distance calculates Euclidean distance from the point or array c. @@ -1284,13 +1268,8 @@ def pickle_load(solution_file): solution_file : Pickle object file containing previous solution to be loaded. """ - try: - filehandler = open(solution_file, "rb") - return pickle.load(filehandler) - except: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception("File could not be loaded.") + filehandler = open(solution_file, "rb") + return pickle.load(filehandler) def parity_residual_plots(self): """ diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 5e6f5d1d95..a94cef8920 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -21,8 +21,13 @@ import numpy as np import pandas as pd +from idaes.logger import getIdaesLogger + __author__ = "Oluwamayowa Amusat" +# Logging +_log = getIdaesLogger(__name__, tag="surrogate") + class FeatureScaling: """ diff --git a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py index 5ac807dd1c..2abc4bf2b0 100644 --- a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py @@ -180,31 +180,29 @@ class TestRadialBasisFunction: @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__01(self, array_type): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( - input_array, basis_function=None, solution_method=None, regularization=None - ) - assert RbfClass.solution_method == "algebraic" - assert RbfClass.basis_function == "gaussian" - assert RbfClass.regularization == True + rbf_class = RadialBasisFunctions(input_array) + assert rbf_class.solution_method == "algebraic" + assert rbf_class.basis_function == "gaussian" + assert rbf_class.regularization is True @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__02(self, array_type): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + rbf_class = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", regularization=False, ) - assert RbfClass.solution_method == "pyomo" - assert RbfClass.basis_function == "linear" - assert RbfClass.regularization == False + assert rbf_class.solution_method == "pyomo" + assert rbf_class.basis_function == "linear" + assert rbf_class.regularization == False @pytest.mark.unit def test__init__03(self): - with pytest.raises(Exception): - RbfClass = RadialBasisFunctions( + with pytest.raises((TypeError, ValueError)): + rbf_class = RadialBasisFunctions( [1, 2, 3, 4], basis_function="LineaR", solution_method="PyoMo", @@ -214,60 +212,50 @@ def test__init__03(self): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__04(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( - input_array, basis_function=None, solution_method=1, regularization=None - ) + rbf_class = RadialBasisFunctions(input_array, solution_method=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__05(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + rbf_class = RadialBasisFunctions( input_array, - basis_function=None, solution_method="idaes", - regularization=None, ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__06(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( - input_array, basis_function=1, solution_method=None, regularization=None - ) + rbf_class = RadialBasisFunctions(input_array, basis_function=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__07(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + RadialBasisFunctions( input_array, basis_function="idaes", - solution_method=None, - regularization=None, ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__08(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( - input_array, basis_function=None, solution_method=None, regularization=1 - ) + RadialBasisFunctions(input_array, regularization=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__09(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -278,9 +266,9 @@ def test__init__09(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__10(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + rbf_class = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -291,9 +279,9 @@ def test__init__10(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__11(self, array_type): - with pytest.raises(Exception): + with pytest.raises((TypeError, ValueError)): input_array = array_type(self.test_data) - RbfClass = RadialBasisFunctions( + rbf_class = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -306,7 +294,7 @@ def test__init__11(self, array_type): def test__init__12(self, array_type): file_name = "test_filename.pickle" input_array = array_type(self.test_data) - RbfClass1 = RadialBasisFunctions( + rbf_class1 = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -314,9 +302,9 @@ def test__init__12(self, array_type): fname=file_name, overwrite=True, ) - p = RbfClass1.get_feature_vector() - results = RbfClass1.training() - RbfClass2 = RadialBasisFunctions( + p = rbf_class1.get_feature_vector() + results = rbf_class1.training() + rbf_class2 = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -324,7 +312,7 @@ def test__init__12(self, array_type): fname=file_name, overwrite=True, ) - assert RbfClass1.filename == RbfClass2.filename + assert rbf_class1.filename == rbf_class2.filename @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -332,7 +320,7 @@ def test__init__14(self, array_type): input_array = array_type(self.test_data) file_name1 = "test_filename1.pickle" file_name2 = "test_filename2.pickle" - RbfClass1 = RadialBasisFunctions( + rbf_class1 = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -340,9 +328,9 @@ def test__init__14(self, array_type): fname=file_name1, overwrite=True, ) - p = RbfClass1.get_feature_vector() - RbfClass1.training() - RbfClass2 = RadialBasisFunctions( + p = rbf_class1.get_feature_vector() + rbf_class1.training() + rbf_class2 = RadialBasisFunctions( input_array, basis_function="LineaR", solution_method="PyoMo", @@ -350,8 +338,8 @@ def test__init__14(self, array_type): fname=file_name2, overwrite=True, ) - assert RbfClass1.filename == file_name1 - assert RbfClass2.filename == file_name2 + assert rbf_class1.filename == file_name1 + assert rbf_class2.filename == file_name2 @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -527,27 +515,27 @@ def test_basis_generation(self, array_type): output_2 = data_feed_02.basis_generation(2) np.testing.assert_array_equal(expected_output_2, output_2) - # # Spline + # Spline data_feed_03 = RadialBasisFunctions(input_array[0:3], basis_function="spline") expected_output_3 = np.nan_to_num(distance_array**2 * np.log(distance_array)) output_3 = data_feed_03.basis_generation(2) np.testing.assert_array_equal(expected_output_3, output_3) - # # Gaussian + # Gaussian data_feed_04 = RadialBasisFunctions(input_array[0:3], basis_function="gaussian") shape_value = 2 expected_output_4 = np.exp(-1 * ((distance_array * shape_value) ** 2)) output_4 = data_feed_04.basis_generation(shape_value) np.testing.assert_array_equal(expected_output_4, output_4) - # # Multiquadric + # Multiquadric data_feed_05 = RadialBasisFunctions(input_array[0:3], basis_function="mq") shape_value = 2 expected_output_5 = np.sqrt(((distance_array * shape_value) ** 2) + 1) output_5 = data_feed_05.basis_generation(shape_value) np.testing.assert_array_equal(expected_output_5, output_5) - # # Inverse multiquadric + # Inverse multiquadric data_feed_06 = RadialBasisFunctions(input_array[0:3], basis_function="imq") shape_value = 2 expected_output_6 = 1 / np.sqrt(((distance_array * shape_value) ** 2) + 1) @@ -557,124 +545,59 @@ def test_basis_generation(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_cost_function_01(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] - x_data_nr = x.shape[0] - x_data_nc = 6 - x_vector = np.zeros((x_data_nr, x_data_nc)) - x_vector[:, 0] = 1 - x_vector[:, 1] = x[:, 0] - x_vector[:, 2] = x[:, 1] - x_vector[:, 3] = x[:, 0] ** 2 - x_vector[:, 4] = x[:, 1] ** 2 - x_vector[:, 5] = x[:, 0] * x[:, 1] + x_vector, y, x_data_nc = self.cost_function_common(array_type) theta = np.zeros((x_data_nc, 1)) - expected_value = 6613.875 - output_1 = RadialBasisFunctions.cost_function(theta, x_vector, y) - assert output_1 == expected_value + assert 6613.875 == RadialBasisFunctions.cost_function(theta, x_vector, y) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_cost_function_02(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] - x_data_nr = x.shape[0] - x_data_nc = 6 - x_vector = np.zeros((x_data_nr, x_data_nc)) - x_vector[:, 0] = 1 - x_vector[:, 1] = x[:, 0] - x_vector[:, 2] = x[:, 1] - x_vector[:, 3] = x[:, 0] ** 2 - x_vector[:, 4] = x[:, 1] ** 2 - x_vector[:, 5] = x[:, 0] * x[:, 1] + x_vector, y, x_data_nc = self.cost_function_common(array_type) theta = np.array([[4.5], [3], [3], [1], [1], [0]]) - expected_value = 90.625 # Calculated externally as sum(dy^2) / 2m - output_1 = RadialBasisFunctions.cost_function(theta, x_vector, y) - assert output_1 == expected_value + # Calculated externally as sum(dy^2) / 2m + assert 90.625 == RadialBasisFunctions.cost_function(theta, x_vector, y) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_cost_function_03(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] - x_data_nr = x.shape[0] - x_data_nc = 6 - x_vector = np.zeros((x_data_nr, x_data_nc)) - x_vector[:, 0] = 1 - x_vector[:, 1] = x[:, 0] - x_vector[:, 2] = x[:, 1] - x_vector[:, 3] = x[:, 0] ** 2 - x_vector[:, 4] = x[:, 1] ** 2 - x_vector[:, 5] = x[:, 0] * x[:, 1] + x_vector, y, x_data_nc = self.cost_function_common(array_type) theta = np.array([[2], [2], [2], [1], [1], [0]]) - expected_value = 0 - output_1 = RadialBasisFunctions.cost_function(theta, x_vector, y) - assert output_1 == expected_value + assert 0 == RadialBasisFunctions.cost_function(theta, x_vector, y) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_gradient_function_01(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] - x_data_nr = x.shape[0] - x_data_nc = 6 - x_vector = np.zeros((x_data_nr, x_data_nc)) - x_vector[:, 0] = 1 - x_vector[:, 1] = x[:, 0] - x_vector[:, 2] = x[:, 1] - x_vector[:, 3] = x[:, 0] ** 2 - x_vector[:, 4] = x[:, 1] ** 2 - x_vector[:, 5] = x[:, 0] * x[:, 1] + x_vector, y, x_data_nc = self.gradient_function_common(array_type) theta = np.zeros((x_data_nc,)) - expected_value = np.array( - [[-97], [-635], [-635], [-5246.875], [-5246.875], [-3925]] - ) - expected_value = expected_value.reshape( - expected_value.shape[0], - ) - output_1 = RadialBasisFunctions.gradient_function(theta, x_vector, y) - np.testing.assert_equal(output_1, expected_value) + ev = np.array([[-97], [-635], [-635], [-5246.875], [-5246.875], [-3925]]) + self.gradient_function_compare(ev, theta, x_vector, y) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_gradient_function_02(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] - x_data_nr = x.shape[0] - x_data_nc = 6 - x_vector = np.zeros((x_data_nr, x_data_nc)) - x_vector[:, 0] = 1 - x_vector[:, 1] = x[:, 0] - x_vector[:, 2] = x[:, 1] - x_vector[:, 3] = x[:, 0] ** 2 - x_vector[:, 4] = x[:, 1] ** 2 - x_vector[:, 5] = x[:, 0] * x[:, 1] - theta = np.array( - [[4.5], [3], [3], [1], [1], [0]] - ) # coefficients in (x1 + 1.5)^2 + (x2 + 1.5) ^ 2 - theta = theta.reshape( - theta.shape[0], - ) - expected_value = np.array( - [[12.5], [75], [75], [593.75], [593.75], [437.5]] - ) # Calculated externally: see Excel sheet - expected_value = expected_value.reshape( - expected_value.shape[0], - ) - output_1 = RadialBasisFunctions.gradient_function(theta, x_vector, y) - np.testing.assert_equal(output_1, expected_value) + x_vector, y, _ = self.gradient_function_common(array_type) + # coefficients in (x1 + 1.5)^2 + (x2 + 1.5) ^ 2 + theta = np.array([[4.5], [3], [3], [1], [1], [0]]) + theta = theta.reshape(theta.shape[0]) + # Calculated externally: see Excel sheet + ev = np.array([[12.5], [75], [75], [593.75], [593.75], [437.5]]) + self.gradient_function_compare(ev, theta, x_vector, y) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_gradient_function_03(self, array_type): - input_array = array_type(self.training_data) - x = input_array[:, :-1] - y = input_array[:, -1] + x_vector, y, _ = self.gradient_function_common(array_type) + # Actual coefficients in (x1 + 1)^2 + (x2 + 1) ^ 2 + theta = np.array([[2], [2], [2], [1], [1], [0]]) + theta = theta.reshape(theta.shape[0]) + ev = np.array([[0], [0], [0], [0], [0], [0]]) + self.gradient_function_compare(ev, theta, x_vector, y) + + @classmethod + def gradient_function_common(cls, arr_type): + arr = arr_type(cls.training_data) + x = arr[:, :-1] + y = arr[:, -1] x_data_nr = x.shape[0] x_data_nc = 6 x_vector = np.zeros((x_data_nr, x_data_nc)) @@ -684,20 +607,17 @@ def test_gradient_function_03(self, array_type): x_vector[:, 3] = x[:, 0] ** 2 x_vector[:, 4] = x[:, 1] ** 2 x_vector[:, 5] = x[:, 0] * x[:, 1] - theta = np.array( - [[2], [2], [2], [1], [1], [0]] - ) # Actual coefficients in (x1 + 1)^2 + (x2 + 1) ^ 2 - theta = theta.reshape( - theta.shape[0], - ) - expected_value = np.array( - [[0], [0], [0], [0], [0], [0]] - ) # Calculated externally: see Excel sheet - expected_value = expected_value.reshape( - expected_value.shape[0], - ) - output_1 = RadialBasisFunctions.gradient_function(theta, x_vector, y) - np.testing.assert_equal(output_1, expected_value) + return x_vector, y, x_data_nc + + @classmethod + def cost_function_common(cls, arr_type): + return cls.gradient_function_common(arr_type) + + @staticmethod + def gradient_function_compare(expected_value, *args): + expected_value = expected_value.reshape(expected_value.shape[0]) + result = RadialBasisFunctions.gradient_function(*args) + np.testing.assert_equal(result, expected_value) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -1027,9 +947,7 @@ def test_loo_error_estimation_with_rippa_method_03(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_leave_one_out_crossvalidation_01(self, array_type): input_array = array_type(self.training_data) - data_feed = RadialBasisFunctions( - input_array, basis_function=None, solution_method=None, regularization=False - ) + data_feed = RadialBasisFunctions(input_array, regularization=False) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() if ( (data_feed.basis_function == "gaussian") @@ -1104,7 +1022,6 @@ def test_leave_one_out_crossvalidation_02(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="cubic", - solution_method=None, regularization=False, ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() @@ -1181,7 +1098,6 @@ def test_leave_one_out_crossvalidation_03(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="linear", - solution_method=None, regularization=False, ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() @@ -1258,7 +1174,6 @@ def test_leave_one_out_crossvalidation_04(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="spline", - solution_method=None, regularization=False, ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() @@ -1335,7 +1250,6 @@ def test_leave_one_out_crossvalidation_05(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="gaussian", - solution_method=None, regularization=False, ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() @@ -1410,7 +1324,7 @@ def test_leave_one_out_crossvalidation_05(self, array_type): def test_leave_one_out_crossvalidation_06(self, array_type): input_array = array_type(self.training_data) data_feed = RadialBasisFunctions( - input_array, basis_function="mq", solution_method=None, regularization=False + input_array, basis_function="mq", regularization=False ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() if ( @@ -1486,7 +1400,6 @@ def test_leave_one_out_crossvalidation_07(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="imq", - solution_method=None, regularization=False, ) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() @@ -1562,7 +1475,6 @@ def test_leave_one_out_crossvalidation_08(self, array_type): input_array = array_type(self.training_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="algebraic", regularization=False, ) @@ -1639,7 +1551,6 @@ def test_leave_one_out_crossvalidation_09(self, array_type): input_array = array_type(self.training_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="BFGS", regularization=False, ) @@ -1716,7 +1627,6 @@ def test_leave_one_out_crossvalidation_10(self, array_type): input_array = array_type(self.training_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="pyomo", regularization=False, ) @@ -1791,9 +1701,7 @@ def test_leave_one_out_crossvalidation_10(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_leave_one_out_crossvalidation_11(self, array_type): input_array = array_type(self.training_data) - data_feed = RadialBasisFunctions( - input_array, basis_function=None, solution_method=None, regularization=True - ) + data_feed = RadialBasisFunctions(input_array, regularization=True) r_best, lambda_best, error_best = data_feed.leave_one_out_crossvalidation() if ( (data_feed.basis_function == "gaussian") @@ -1867,7 +1775,6 @@ def test_rbf_training_01(self, array_type): input_array = array_type(self.test_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="algebraic", regularization=False, ) @@ -1928,7 +1835,6 @@ def test_rbf_training_02(self, array_type, caplog): input_array = array_type(self.test_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="pyomo", regularization=False, ) @@ -1944,7 +1850,6 @@ def test_rbf_training_03(self, array_type, caplog): input_array = array_type(self.test_data) data_feed = RadialBasisFunctions( input_array, - basis_function=None, solution_method="bfgs", regularization=False, ) @@ -2124,7 +2029,6 @@ def test_rbf_generate_expression_01(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="linear", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2141,7 +2045,6 @@ def test_rbf_generate_expression_02(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="cubic", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2158,7 +2061,6 @@ def test_rbf_generate_expression_03(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="gaussian", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2173,7 +2075,7 @@ def test_rbf_generate_expression_03(self, array_type): def test_rbf_generate_expression_04(self, array_type): input_array = array_type(self.training_data) data_feed = RadialBasisFunctions( - input_array, basis_function="mq", solution_method=None, regularization=False + input_array, basis_function="mq", regularization=False ) p = data_feed.get_feature_vector() results = data_feed.training() @@ -2189,7 +2091,6 @@ def test_rbf_generate_expression_05(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="imq", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2206,7 +2107,6 @@ def test_rbf_generate_expression_06(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="spline", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2223,7 +2123,6 @@ def test_pickle_load01(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="spline", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2237,7 +2136,6 @@ def test_pickle_load02(self, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="spline", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() @@ -2253,7 +2151,6 @@ def test_parity_residual_plots(self, mock_show, array_type): data_feed = RadialBasisFunctions( input_array, basis_function="spline", - solution_method=None, regularization=False, ) p = data_feed.get_feature_vector() From 73c27f1b79dfd67e3ed92cf715baac34f750ad4d Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Tue, 11 Jun 2024 10:31:02 -0700 Subject: [PATCH 09/11] radial basis tests pass --- .../pysmo/tests/test_radial_basis_function.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py index 2abc4bf2b0..32268f5de8 100644 --- a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py @@ -176,6 +176,22 @@ class TestRadialBasisFunction: sample_points_1d = [[(i + 1) ** 2] for i in range(8)] sample_points_3d = [[i, (i + 1) ** 2, (i + 2) ** 2] for i in range(8)] + @pytest.mark.unit + def test_constructor_nones(self): + """For some reason, the original interface didn't provide default values + for keyword arguments, but instead explicitly tested for None in the body + of the constructor. This may have (mis)led people to explicitly pass None + as a value instead of not including keywords when they wanted the default value. + Since there are also explicit type-checks, fixing this behavior to be more normal + will break such code without keeping an explicit check for None. + + So, this test makes sure the old `kw=None` interface still works until + such time as sanity prevails. + """ + input_array = pd.DataFrame(self.test_data) + rbf = RadialBasisFunctions(input_array, basis_function=None, solution_method=None, + regularizatio=None) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__01(self, array_type): From 155a85fd67e02c31301aa1c5d3db696603b2b546 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Tue, 11 Jun 2024 16:36:47 -0700 Subject: [PATCH 10/11] improvements --- .../surrogate/pysmo/radial_basis_function.py | 72 +++++++++++-------- .../pysmo/tests/test_radial_basis_function.py | 32 ++++++--- 2 files changed, 65 insertions(+), 39 deletions(-) diff --git a/idaes/core/surrogate/pysmo/radial_basis_function.py b/idaes/core/surrogate/pysmo/radial_basis_function.py index 555da40eaa..2f68635546 100644 --- a/idaes/core/surrogate/pysmo/radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/radial_basis_function.py @@ -14,6 +14,7 @@ # Imports from the python standard library import os.path +from pathlib import Path import pickle # Imports from third parties @@ -200,6 +201,10 @@ class RadialBasisFunctions: #: known basis functions, see docs for details BASIS_FUNCTIONS = {"linear", "cubic", "gaussian", "mq", "imq", "spline"} + #: Set this if you want all instances producing output in a directory + #: other than the current working directory + output_dir: Path = None + def __init__( self, XY_data, @@ -217,6 +222,7 @@ def __init__( XY_data (Numpy Array or Pandas Dataframe): The dataset for RBF training. **XY_data** is expected to contain feature and output information, with the output values (y) in the last column. Keyword Args: + basis_function(str): The basis function transformation to be applied to the training data. Two classes of basis transformations are available for selection: - Fixed basis transformations, which require no shape parameter :math:`\sigma` : @@ -241,18 +247,9 @@ def __init__( regularization(bool): This option determines whether or not the regularization parameter :math:`\lambda` is considered during RBF fitting. Default setting is True. - Returns: - **self** object with the input information - Raises: - ValueError: The input dataset is of the wrong type (not a NumPy array or Pandas Dataframe) + TypeError or ValueError: The input dataset is of the wrong type (not a NumPy array or Pandas Dataframe) - Exception: - * **basis_function** entry is not valid. - Exception: - * **solution_method** is not 'algebraic', 'pyomo' or 'bfgs'. - Exception: - - :math:`\lambda` is not boolean. **Example:** @@ -260,9 +257,14 @@ def __init__( # Specify the gaussian basis transformation >>> d = RadialBasisFunctions(XY_data, basis_function='gaussian') + >>> e = RadialBasisFunctions(XY_data) # equivalent to above, since gaussian is default """ _log.info(f"RadialBasisFunctions constructor:begin") + + # Use class-wide output directory or current directory if none is defined + output_dir = self.output_dir or Path(".") + if not isinstance(overwrite, bool): self._bad_arg(overwrite, "overwrite", "must be boolean", type_error=True) self.overwrite = overwrite @@ -272,17 +274,21 @@ def __init__( if not fname.endswith(".pickle"): self._bad_arg(fname, "fname", "must have extension '.pickle'") # Explicit overwrite done by user - if os.path.exists(fname) and overwrite is True: - _log.warning(f"file '{fname}' exists, previous file will be overwritten") - self.filename = fname - elif os.path.exists(fname) and overwrite is False: # User is not overwriting - self.filename = date_versioned_filename(fname) - _log.warning( - f"'{fname}' exists, results will be saved " f"to '{self.filename}'" - ) - # self.filename = 'solution.pickle' - elif os.path.exists(fname) is False: - self.filename = fname + full_path = output_dir / fname + if full_path.exists(): + if overwrite: + _log.warning( + f"file '{full_path}' exists, previous file will be overwritten" + ) + self.filename = str(full_path) + else: # User is not overwriting + self.filename = date_versioned_filename(str(full_path)) + _log.warning( + f"'{full_path}' exists, results will be saved " + f"to '{self.filename}'" + ) + else: + self.filename = str(full_path) # Check data types and shapes if isinstance(XY_data, pd.DataFrame): @@ -1163,7 +1169,8 @@ def generate_expression(self, variable_list): """ t1 = np.array([variable_list], dtype="object") - # Reshaping of variable array is necessary when input variables are Pyomo scalar variables + # Reshaping of variable array is necessary when input variables are + # Pyomo scalar variables t1 = t1.reshape(1, len(variable_list)) if t1.ndim > 2 else t1 basis_vector = [] @@ -1221,19 +1228,23 @@ def generate_expression(self, variable_list): def get_feature_vector(self): """ - The ``get_feature_vector`` method generates the list of regression features from the column headers of the input dataset. + The ``get_feature_vector`` method generates the list of regression features + from the column headers of the input dataset. Returns: - Pyomo IndexedParam : An indexed parameter list of the variables supplied in the original data + Pyomo IndexedParam : An indexed parameter list of the variables supplied + in the original data **Example:** .. code-block:: python - # Create a small dataframe with three columns ('one', 'two', 'three') and two rows (A, B) + # Create a small dataframe with three columns ('one', 'two', 'three') + # and two rows (A, B) >>> xy_data = pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], orient='index', columns=['one', 'two', 'three']) - # Initialize the **RadialBasisFunctions** class with a linear kernel and print the column headers for the variables + # Initialize the **RadialBasisFunctions** class with a linear kernel + # and print the column headers for the variables >>> f = RadialBasisFunctions(xy_data, basis_function='linear') >>> p = f.get_feature_vector() >>> for i in p.keys(): @@ -1261,12 +1272,11 @@ def pickle_save(self, solutions): @staticmethod def pickle_load(solution_file): - """ - pickle_load loads the results of a saved run 'file.obj'. - - Input arguments: - solution_file : Pickle object file containing previous solution to be loaded. + """Loads the results of a saved run 'file.obj'. + ArgsL + solution_file: Pickle object file containing previous solution + to be loaded. """ filehandler = open(solution_file, "rb") return pickle.load(filehandler) diff --git a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py index 32268f5de8..e0f0d648f4 100644 --- a/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/tests/test_radial_basis_function.py @@ -13,6 +13,8 @@ import sys import os from unittest.mock import patch +from tempfile import TemporaryDirectory +from pathlib import Path sys.path.append(os.path.abspath("..")) # current folder is ~/tests\ from idaes.core.surrogate.pysmo.radial_basis_function import ( @@ -26,6 +28,11 @@ from idaes.core.surrogate.pysmo.tests import logs_got_warning +# Set a temporary directory for output +_tmpdir = TemporaryDirectory() +RadialBasisFunctions.output_dir = Path(_tmpdir.name) + + class TestFeatureScaling: test_data_1d = [[x] for x in range(10)] test_data_2d = [[x, (x + 1) ** 2] for x in range(10)] @@ -154,6 +161,7 @@ def test_data_unscaling_minmax_06(self, array_type): class TestRadialBasisFunction: + y = np.array( [ [i, j, ((i + 1) ** 2) + ((j + 1) ** 2)] @@ -182,15 +190,22 @@ def test_constructor_nones(self): for keyword arguments, but instead explicitly tested for None in the body of the constructor. This may have (mis)led people to explicitly pass None as a value instead of not including keywords when they wanted the default value. - Since there are also explicit type-checks, fixing this behavior to be more normal - will break such code without keeping an explicit check for None. + Since there are also explicit type-checks, passing None for something that + should be a bool or string will fail with a TypeError. - So, this test makes sure the old `kw=None` interface still works until - such time as sanity prevails. + In other words, don't do this! If you want the default value, omit the keyword. """ input_array = pd.DataFrame(self.test_data) - rbf = RadialBasisFunctions(input_array, basis_function=None, solution_method=None, - regularizatio=None) + with pytest.raises(TypeError): + RadialBasisFunctions(input_array, basis_function=None) + with pytest.raises(TypeError): + RadialBasisFunctions(input_array, solution_method=None) + with pytest.raises(TypeError): + RadialBasisFunctions(input_array, regularization=None) + with pytest.raises(TypeError): + RadialBasisFunctions(input_array, fname=None) + with pytest.raises(TypeError): + RadialBasisFunctions(input_array, overwrite=None) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -354,8 +369,9 @@ def test__init__14(self, array_type): fname=file_name2, overwrite=True, ) - assert rbf_class1.filename == file_name1 - assert rbf_class2.filename == file_name2 + # due to output_dir, compare basename instead of full path + assert os.path.basename(rbf_class1.filename) == file_name1 + assert os.path.basename(rbf_class2.filename) == file_name2 @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) From 220035ad9fa77128a657f0e5e6b569a7a47aa8d2 Mon Sep 17 00:00:00 2001 From: Dan Gunter Date: Tue, 11 Jun 2024 21:37:30 -0700 Subject: [PATCH 11/11] some more changes --- .../surrogate/pysmo/polynomial_regression.py | 3 +- idaes/core/surrogate/pysmo/sampling.py | 89 +++++++------------ .../surrogate/pysmo/tests/test_sampling.py | 1 + idaes/core/surrogate/pysmo_surrogate.py | 16 +--- 4 files changed, 38 insertions(+), 71 deletions(-) diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index c7dc0b3540..a6c941176e 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -380,9 +380,10 @@ def __init__( self._bad_arg(max_iter, aname, "must be positive") self.max_iter = max_iter - meth, aname = solution_method.lower(), "solution_method" + meth, aname = solution_method, "solution_method" if not isinstance(meth, str): self._bad_arg(meth, aname, "must be a string", type_error=True) + meth = meth.lower() if meth not in self.SOLUTION_METHODS: method_list = ", ".join( [f"'{k}'={v}" for k, v in self.SOLUTION_METHODS.items()] diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index a94cef8920..3c73bcf627 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -10,12 +10,7 @@ # All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md # for full copyright and license information. ################################################################################# -# TODO: Missing doc strings -# pylint: disable=missing-module-docstring -# pylint: disable=missing-class-docstring -# pylint: disable=missing-function-docstring -import warnings import itertools import numpy as np @@ -79,9 +74,7 @@ def data_scaling_minmax(data): @staticmethod def data_unscaling_minmax(x_scaled, x_min, x_max): - """ - - This function performs column-wise un-scaling on the a minmax-scaled input dataset. + """Perform column-wise un-scaling on the a minmax-scaled input dataset. Args: x_scaled(NumPy Array): The input data set to be un-scaled. Data values should be between 0 and 1. @@ -102,27 +95,6 @@ def data_unscaling_minmax(x_scaled, x_min, x_max): unscaled_data = x_min + x_scaled * (x_max - x_min) return unscaled_data - # @staticmethod - # def data_scaling_standardization(data): - # # Confirm that data type is an array or DataFrame - # if isinstance(data, np.ndarray): - # input_data = data - # elif isinstance(data, pd.DataFrame): - # input_data = data.values - # else: - # raise TypeError('original_data_input: Pandas dataframe or numpy array required.') - # - # if input_data.ndim == 1: - # input_data = input_data.reshape(len(input_data), 1) - # - # data_mean = np.mean(input_data, axis=0) - # data_stdev = np.std(input_data, axis=0) - # scaled_data = (input_data - data_mean) / data_stdev - # data_mean = data_mean.reshape(1, data_mean.shape[0]) - # data_stdev = data_stdev.reshape(1, data_stdev.shape[0]) - # return scaled_data, data_mean, data_stdev - - class SamplingMethods: def nearest_neighbour(self, full_data, a): """ @@ -185,12 +157,14 @@ def sample_point_selection(self, full_data, sample_points, sampling_type): unique_sample_points = np.unique(points_closest_unscaled, axis=0) if unique_sample_points.shape[0] < points_closest_unscaled.shape[0]: - warnings.warn( - "The returned number of samples is less than the requested number due to repetitions during nearest neighbour selection." + _log.warning( + "The returned number of samples is less than " + "the requested number due to repetitions during " + "nearest neighbour selection." ) - print( - "\nNumber of unique samples returned by sampling algorithm:", - unique_sample_points.shape[0], + _log.info( + f"Number of unique samples returned by sampling algorithm: " + f"{unique_sample_points.shape[0]}" ) elif sampling_type == "creation": @@ -393,7 +367,7 @@ def selection_columns_preprocessing(self, data_input, xlabels, ylabels): warn_str = "The following columns were dropped: " + str( dropped_cols ) - warnings.warn(warn_str) + _log.warning(warn_str) self.x_data = data_input.filter(xlabels).values self.data_headers = set_of_labels self.data_headers_xvars = xlabels @@ -453,7 +427,7 @@ def selection_columns_preprocessing(self, data_input, xlabels, ylabels): warn_str = "The following columns were dropped: " + str( dropped_cols ) - warnings.warn(warn_str) + _log.warning(warn_str) self.x_data = data_input[:, xlabels] self.data_headers = set_of_labels self.data_headers_xvars = xlabels @@ -1033,10 +1007,10 @@ def __init__( self.x_data = bounds_array # Only x data will be present in this case if self.x_data.shape[1] > 10: - # PYLINT-TODO - # pylint: disable-next=broad-exception-raised - raise Exception( - "Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions" + raise ValueError( + "Dimensionality problem: This method is not available for problems" + "with dimensionality > 10: the performance of the method degrades " + "substantially at higher dimensions" ) def sample_points(self): @@ -1218,14 +1192,16 @@ def __init__( if self.x_data.shape[1] > 10: # PYLINT-TODO # pylint: disable-next=broad-exception-raised - raise Exception( - "Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions" + raise ValueError( + "Dimensionality problem: This method is not available for problems with " + "dimensionality > 10: the performance of the method degrades " + "substantially at higher dimensions" ) def sample_points(self): - """ - The **sampling_type** method generates the Hammersley sample points. The steps followed here are: + """Generate the Hammersley sample points. + The steps followed here are: 1. Determine the number of features :math:`n_{f}` in the input data. 2. Generate the list of :math:`\\left(n_{f}-1\\right)` primes to be considered by calling prime_number_generator. 3. Divide the space [0,**number_of_samples**-1] into **number_of_samples** places to obtain the first dimension for the Hammersley sequence. @@ -1234,7 +1210,7 @@ def sample_points(self): 6. When in "selection" mode, determine the closest corresponding point in the input dataset using Euclidean distance minimization. This is done by calling the ``nearest_neighbours`` method in the sampling superclass. Returns: - NumPy Array or Pandas Dataframe: A numpy array or Pandas dataframe containing **number_of_samples** Hammersley sample points. + NumPy Array or Pandas Dataframe: A numpy array or Pandas dataframe containing **number_of_samples** Hammersley sample points. """ no_features = self.x_data.shape[1] @@ -1262,8 +1238,7 @@ def sample_points(self): class CVTSampling(SamplingMethods): - """ - A class that constructs Centroidal Voronoi Tessellation (CVT) samples. + """A class that constructs Centroidal Voronoi Tessellation (CVT) samples. CVT sampling is based on the generation of samples in which the generators of the Voronoi tessellations and the mass centroids coincide. @@ -1289,8 +1264,8 @@ def __init__( ylabels=None, rand_seed=None, ): - """ - Initialization of CVTSampling class. Two inputs are required, while an optional option to control the solution accuracy may be specified. + """Constructor. Two inputs are required, while an optional option to + control the solution accuracy may be specified. Args: data_input (NumPy Array, Pandas Dataframe or list): The input data set or range to be sampled. @@ -1309,9 +1284,6 @@ def __init__( - The smaller the value of tolerance, the better the solution but the longer the algorithm requires to converge. Default value is :math:`10^{-7}`. - Returns: - **self** function containing the input information. - Raises: ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative) @@ -1324,8 +1296,6 @@ def __init__( Exception: When the tolerance specified is invalid - warnings.warn: when the tolerance specified by the user is too tight (tolerance < :math:`10^{-9}`) - """ if sampling_type is None: sampling_type = "creation" @@ -1340,7 +1310,9 @@ def __init__( self.sampling_type = sampling_type else: raise ValueError( - 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' + 'Invalid sampling type requirement entered. ' + 'Enter "creation" for sampling from a range or "selection" for ' + 'selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -1422,7 +1394,7 @@ def __init__( elif tolerance > 0.1: raise ValueError("Tolerance must be less than 0.1 to achieve good results") elif tolerance < 1e-9: - warnings.warn( + _log.warning( "Tolerance too tight. CVT algorithm may take long time to converge." ) elif (tolerance < 0.1) and (tolerance > 1e-9): @@ -1789,8 +1761,9 @@ def generate_from_dist(self, dist_name): ) > 0 ): - warnings.warn( - "Points adjusted to remain within specified Gaussian bounds. This may affect the underlying distribution." + _log.warning( + "Points adjusted to remain within specified Gaussian bounds." + "This may affect the underlying distribution." ) out_locations = [ i diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index c104ea8d6e..0894245458 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -28,6 +28,7 @@ SamplingMethods, FeatureScaling, ) +from idaes.core.surrogate.pysmo.tests import logs_got_warning class TestFeatureScaling: diff --git a/idaes/core/surrogate/pysmo_surrogate.py b/idaes/core/surrogate/pysmo_surrogate.py index a66e98eeca..5ad99500de 100644 --- a/idaes/core/surrogate/pysmo_surrogate.py +++ b/idaes/core/surrogate/pysmo_surrogate.py @@ -10,13 +10,6 @@ # All rights reserved. Please see the files COPYRIGHT.md and LICENSE.md # for full copyright and license information. ################################################################################# -# TODO: Missing doc strings -# pylint: disable=missing-module-docstring -# pylint: disable=missing-class-docstring -# pylint: disable=missing-function-docstring - -# TODO: Look into protected access issues -# pylint: disable=protected-access # stdlib import io @@ -44,12 +37,11 @@ __author__ = "Oluwamayowa Amusat" + # Logging -# ------- _log = idaeslog.getLogger(__name__) # Global variables -# ---------------- GLOBAL_FUNCS = {"sin": sin, "cos": cos, "log": log, "exp": exp} @@ -246,7 +238,7 @@ class PysmoPolyTrainer(PysmoTrainer): CONFIG.declare( "maximum_polynomial_order", ConfigValue( - default=None, + default=10, domain=PositiveInt, description="Maximum order of univariate terms. Maximum value is 10.", ), @@ -271,7 +263,7 @@ class PysmoPolyTrainer(PysmoTrainer): CONFIG.declare( "solution_method", ConfigValue( - default=None, + default="pyomo", domain=In(["pyomo", "mle", "bfgs"]), description="Method for solving regression problem. Must be one of the options ['pyomo', 'mle', 'bfgs']. ", ), @@ -280,7 +272,7 @@ class PysmoPolyTrainer(PysmoTrainer): CONFIG.declare( "multinomials", ConfigValue( - default=False, + default=True, domain=Bool, description="Option for bi-variate pairwise terms in final polynomial", ),