From 3362cd3c7c7931376b050e6ab9673b273ec3b91d Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Fri, 3 Feb 2023 17:21:46 -0800 Subject: [PATCH 01/19] Fix NumPy array creation error by specifying object type --- idaes/core/surrogate/pysmo/kriging.py | 2 +- .../surrogate/pysmo/polynomial_regression.py | 2 +- .../surrogate/pysmo/radial_basis_function.py | 2 +- .../core/surrogate/tests/test_pysmo_surrogate.py | 16 ++++++++++++++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/idaes/core/surrogate/pysmo/kriging.py b/idaes/core/surrogate/pysmo/kriging.py index e6e0549de3..6849941303 100644 --- a/idaes/core/surrogate/pysmo/kriging.py +++ b/idaes/core/surrogate/pysmo/kriging.py @@ -655,7 +655,7 @@ def generate_expression(self, variable_list): Pyomo Expression : Pyomo expression of the Kriging model based on the variables provided in **variable_list** """ - t1 = np.array([variable_list]) + t1 = np.array([variable_list], dtype="object") # Reshaping of variable array is necessary when input variables are Pyomo scalar variables t1 = t1.reshape(1, len(variable_list)) if t1.ndim > 2 else t1 diff --git a/idaes/core/surrogate/pysmo/polynomial_regression.py b/idaes/core/surrogate/pysmo/polynomial_regression.py index 9a74ab8362..235a9c23d4 100644 --- a/idaes/core/surrogate/pysmo/polynomial_regression.py +++ b/idaes/core/surrogate/pysmo/polynomial_regression.py @@ -1602,7 +1602,7 @@ def generate_expression(self, variable_list): """ # Reshaping of array necessary when input variables are Pyomo scalar variables - vl = np.array([variable_list]) + vl = np.array([variable_list], dtype="object") vl = vl.reshape(1, len(variable_list)) if vl.ndim > 2 else vl terms = PolynomialRegression.polygeneration( diff --git a/idaes/core/surrogate/pysmo/radial_basis_function.py b/idaes/core/surrogate/pysmo/radial_basis_function.py index 2b81583626..f4826597e1 100644 --- a/idaes/core/surrogate/pysmo/radial_basis_function.py +++ b/idaes/core/surrogate/pysmo/radial_basis_function.py @@ -1163,7 +1163,7 @@ def generate_expression(self, variable_list): Pyomo Expression : Pyomo expression of the RBF model based on the variables provided in **variable_list** """ - t1 = np.array([variable_list]) + t1 = np.array([variable_list], dtype="object") # Reshaping of variable array is necessary when input variables are Pyomo scalar variables t1 = t1.reshape(1, len(variable_list)) if t1.ndim > 2 else t1 diff --git a/idaes/core/surrogate/tests/test_pysmo_surrogate.py b/idaes/core/surrogate/tests/test_pysmo_surrogate.py index aacaa04494..319f2be00b 100644 --- a/idaes/core/surrogate/tests/test_pysmo_surrogate.py +++ b/idaes/core/surrogate/tests/test_pysmo_surrogate.py @@ -1633,6 +1633,22 @@ def test_populate_block_multisurrogate_poly_userdef(self, pysmo_surr4): cstr = cstr.replace("inputs[x2]", "5") assert eval(cstr) == pytest.approx(0, abs=1e-8) + @pytest.mark.unit + def test_populate_block_multisurrogate_poly_userdef_mixedtypes(self, pysmo_surr1): + # Test ``populate_block`` for input variables of mixed types, e.g scalar and indexed pyomo variables + + m = ConcreteModel() + m.x1 = Var(initialize=0, bounds=(0, 5)) + m.x2 = Var([0], initialize=0, bounds=(0, 10)) + m.z1 = Var(initialize=0, bounds=(0, 10)) + m.surrogate = SurrogateBlock(concrete=True) + m.surrogate.build_model( + pysmo_surr1, input_vars=[m.x1, m.x2], output_vars=[m.z1] + ) + print(m) + assert len(m.surrogate.pysmo_constraint) == 1 + m.display() + @pytest.mark.parametrize( "confidence_dict", [{0.99: 3.2498355440153697}, {0.90: 1.8331129326536335}] ) From 2368de95e9b3218e386a895be07c47fa2ffc1897 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 6 Feb 2023 08:25:26 -0800 Subject: [PATCH 02/19] Removing print and display statements --- idaes/core/surrogate/tests/test_pysmo_surrogate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/idaes/core/surrogate/tests/test_pysmo_surrogate.py b/idaes/core/surrogate/tests/test_pysmo_surrogate.py index 319f2be00b..241fe48247 100644 --- a/idaes/core/surrogate/tests/test_pysmo_surrogate.py +++ b/idaes/core/surrogate/tests/test_pysmo_surrogate.py @@ -1645,9 +1645,7 @@ def test_populate_block_multisurrogate_poly_userdef_mixedtypes(self, pysmo_surr1 m.surrogate.build_model( pysmo_surr1, input_vars=[m.x1, m.x2], output_vars=[m.z1] ) - print(m) assert len(m.surrogate.pysmo_constraint) == 1 - m.display() @pytest.mark.parametrize( "confidence_dict", [{0.99: 3.2498355440153697}, {0.90: 1.8331129326536335}] From 7bb03104ca07d6700719245c056d906afa7ee18e Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:04:19 -0800 Subject: [PATCH 03/19] Adding a function for custom sampling. - User can explicitly define a distribution for sampling of each variable. Sampling options currently available are random, uniform and Gaussian. --- idaes/core/surrogate/pysmo/sampling.py | 199 ++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 1 deletion(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 739d4edfb1..8c72e9a7e0 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1125,7 +1125,6 @@ def __init__( print("Sampling type: ", self.sampling_type, "\n") if self.sampling_type == "selection": - if isinstance(data_input, (pd.DataFrame, np.ndarray)): self.selection_columns_preprocessing(data_input, xlabels, ylabels) else: @@ -1536,3 +1535,201 @@ def sample_points(self): unique_sample_points, columns=self.data_headers ) return unique_sample_points + + +class CustomSampling(SamplingMethods): + """ + A class that performs custom sampling per dimension as specified by the user. The distribution to be applied per dimension must be specified by the user. + + - The distribution to be used per variable needs to be specified in a list. + + To use: call class with inputs, and then ``sample_points`` function + + **Example:** + + .. code-block:: python + + # To select 50 samples on a (10 x 5) grid in a 2D space: + >>> b = rbf.UniformSampling(data, [10, 5], sampling_type="selection") + >>> samples = b.sample_points() + + """ + + def __init__( + self, + data_input, + number_of_samples=None, + list_of_distributions=None, + sampling_type=None, + xlabels=None, + ylabels=None, + ): + """ + Initialization of CustomSampling class. Three inputs are required. + + Args: + data_input (NumPy Array, Pandas Dataframe or list) : The input data set or range to be sampled. + + - When the aim is to select a set of samples from an existing dataset, the dataset must be a NumPy Array or a Pandas Dataframe and **sampling_type** option must be set to "selection". A single output variable (y) is assumed to be supplied in the last column if **xlabels** and **ylabels** are not supplied. + - When the aim is to generate a set of samples from a data range, the dataset must be a list containing two lists of equal lengths which contain the variable bounds and **sampling_type** option must be set to "creation". It is assumed that the range contains no output variable information in this case. + + number_of_samples(int): The number of samples to be generated. Should be a positive integer less than or equal to the number of entries (rows) in **data_input**. + list_of_distributions (list): The list containing the probability distribution for each variable. We currently support random, uniform and normal(i.e. Gaussian) distributions. + sampling_type (str) : Option which determines whether the algorithm selects samples from an existing dataset ("selection") or attempts to generate sample from a supplied range ("creation"). Default is "creation". + + Keyword Args: + xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. + ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + + Returns: + **self** function containing the input information + + Raises: + ValueError: The **data_input** is the wrong type + + ValueError: When a non-implemented distribution is supplied in list_of_distributions + + IndexError: When invalid column names are supplied in **xlabels** or **ylabels** + + Exception: When the **number_of_samples** is invalid (not an integer, too large, zero, negative) + + + """ + if sampling_type is None: + sampling_type = "creation" + self.sampling_type = sampling_type + print("Creation-type sampling will be used.") + elif not isinstance(sampling_type, str): + raise Exception("Invalid sampling type entry. Must be of type .") + elif (sampling_type.lower() == "creation") or ( + sampling_type.lower() == "selection" + ): + sampling_type = sampling_type.lower() + self.sampling_type = sampling_type + else: + raise Exception( + 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' + ) + print("Sampling type: ", self.sampling_type, "\n") + + if self.sampling_type == "selection": + if isinstance(data_input, (pd.DataFrame, np.ndarray)): + self.selection_columns_preprocessing(data_input, xlabels, ylabels) + else: + raise ValueError( + 'Pandas dataframe or numpy array required for sampling_type "selection."' + ) + + # Catch potential errors in number_of_samples + if number_of_samples is None: + print( + "\nNo entry for number of samples to be generated. The default value of 5 will be used." + ) + number_of_samples = 5 + elif number_of_samples > self.data.shape[0]: + raise Exception( + "LHS sample size cannot be greater than number of samples in the input data set" + ) + elif not isinstance(number_of_samples, int): + raise Exception("number_of_samples must be an integer.") + elif number_of_samples <= 0: + raise Exception("number_of_samples must a positive, non-zero integer.") + self.number_of_samples = number_of_samples + + elif self.sampling_type == "creation": + if not isinstance(data_input, list): + raise ValueError( + 'List entry of two elements expected for sampling_type "creation."' + ) + elif len(data_input) != 2: + raise Exception("data_input must contain two lists of equal lengths.") + elif not isinstance(data_input[0], list) or not isinstance( + data_input[1], list + ): + raise Exception("data_input must contain two lists of equal lengths.") + elif len(data_input[0]) != len(data_input[1]): + raise Exception("data_input must contain two lists of equal lengths.") + elif data_input[0] == data_input[1]: + raise Exception("Invalid entry: both lists are equal.") + else: + bounds_array = np.zeros( + ( + 2, + len(data_input[0]), + ) + ) + bounds_array[0, :] = np.array(data_input[0]) + bounds_array[1, :] = np.array(data_input[1]) + data_headers = [] + self.data = bounds_array + self.data_headers = data_headers + + # Catch potential errors in number_of_samples + if number_of_samples is None: + print( + "\nNo entry for number of samples to be generated. The default value of 5 will be used." + ) + number_of_samples = 5 + elif not isinstance(number_of_samples, int): + raise Exception("number_of_samples must be an integer.") + elif number_of_samples <= 0: + raise Exception("number_of_samples must a positive, non-zero integer.") + self.number_of_samples = number_of_samples + self.x_data = bounds_array # Only x data will be present in this case + + # Check that list_of_distributions is a list, list length is correct and all list values are strings + if list_of_distributions is None: + raise ValueError("list_of_distributions cannot be empty.") + if not isinstance(list_of_distributions, list): + raise TypeError("list_of_distributions: list required.") + if len(list_of_distributions) != self.x_data.shape[1]: + raise ValueError( + "Length of list_of_distributions must equal the number of variables." + ) + if all(isinstance(q, str) for q in list_of_distributions) is False: + raise TypeError("All values in list must be strings") + if not all( + q.lower() in ["random", "normal", "uniform"] for q in list_of_distributions + ): + raise ValueError( + "list_of_distributions only supports 'random', 'gaussian' and 'uniform' sampling options." + ) + self.dist_vector = list_of_distributions + + def sample_points(self): + points_spread = [] + for i in self.dist_vector: + if i.lower() in ["uniform", "random"]: + dist = getattr(np.random.default_rng(), "uniform") + var_values = dist(size=self.number_of_samples) + points_spread.append(var_values) + elif i.lower() == "normal": + dist = getattr(np.random.default_rng(), "normal") + var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) + if sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] > 1] + ) + sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] + ): + warnings.warn( + "Points adjusted to remain within specified Gaussian bounds." + ) + var_values_truncated = np.array( + [1.0 if j > 1.0 else 0.0 if j < 0.0 else j for j in var_values] + ) + points_spread.append(var_values_truncated) + else: + raise ValueError( + "list_of_distributions only supports 'random', 'normal' and 'uniform' sampling options." + ) + samples_array = np.asarray(points_spread).T + + # Scale input data, then find data points closest in sample space. Unscale before returning points + unique_sample_points = self.sample_point_selection( + self.data, samples_array, self.sampling_type + ) + if len(self.data_headers) > 0 and self.df_flag: + unique_sample_points = pd.DataFrame( + unique_sample_points, columns=self.data_headers + ) + return unique_sample_points From 2eadf88f59ce664e1befb8cc369504c7936168e5 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:56:40 -0800 Subject: [PATCH 04/19] Improve errors and warnings --- idaes/core/surrogate/pysmo/sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 8c72e9a7e0..5ec304325b 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1628,7 +1628,7 @@ def __init__( number_of_samples = 5 elif number_of_samples > self.data.shape[0]: raise Exception( - "LHS sample size cannot be greater than number of samples in the input data set" + "Sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): raise Exception("number_of_samples must be an integer.") @@ -1681,7 +1681,7 @@ def __init__( if list_of_distributions is None: raise ValueError("list_of_distributions cannot be empty.") if not isinstance(list_of_distributions, list): - raise TypeError("list_of_distributions: list required.") + raise TypeError("Error with list_of_distributions: list required.") if len(list_of_distributions) != self.x_data.shape[1]: raise ValueError( "Length of list_of_distributions must equal the number of variables." From 2a952a0ec63878dadae903d2aaa5d06ffe363a47 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Thu, 7 Dec 2023 20:22:56 -0800 Subject: [PATCH 05/19] Tests for CustomSampling --- idaes/core/surrogate/pysmo/sampling.py | 50 +- .../surrogate/pysmo/tests/test_sampling.py | 453 ++++++++++++++++++ 2 files changed, 479 insertions(+), 24 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 5ec304325b..6a5894a060 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1692,36 +1692,38 @@ def __init__( q.lower() in ["random", "normal", "uniform"] for q in list_of_distributions ): raise ValueError( - "list_of_distributions only supports 'random', 'gaussian' and 'uniform' sampling options." + "list_of_distributions only supports 'random', 'normal' and 'uniform' sampling options." ) self.dist_vector = list_of_distributions + + def generate_from_dist(self, dist_name): + if dist_name.lower() in ["uniform", "random"]: + dist = getattr(np.random.default_rng(), dist_name.lower()) + var_values = np.array(dist(size=self.number_of_samples)) + return dist, var_values + elif dist_name.lower() == "normal": + dist = getattr(np.random.default_rng(), "normal") + var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) + if sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] > 1] + ) + sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] + ): + warnings.warn( + "Points adjusted to remain within specified Gaussian bounds." + ) + var_values_truncated = np.array( + [1.0 if j > 1.0 else 0.0 if j < 0.0 else j for j in var_values] + ) + return dist, var_values_truncated + + def sample_points(self): points_spread = [] for i in self.dist_vector: - if i.lower() in ["uniform", "random"]: - dist = getattr(np.random.default_rng(), "uniform") - var_values = dist(size=self.number_of_samples) - points_spread.append(var_values) - elif i.lower() == "normal": - dist = getattr(np.random.default_rng(), "normal") - var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) - if sum( - [1 for i in range(0, var_values.shape[0]) if var_values[i] > 1] - ) + sum( - [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] - ): - warnings.warn( - "Points adjusted to remain within specified Gaussian bounds." - ) - var_values_truncated = np.array( - [1.0 if j > 1.0 else 0.0 if j < 0.0 else j for j in var_values] - ) - points_spread.append(var_values_truncated) - else: - raise ValueError( - "list_of_distributions only supports 'random', 'normal' and 'uniform' sampling options." - ) + _, var_values = self.generate_from_dist(i) + points_spread.append(var_values) samples_array = np.asarray(points_spread).T # Scale input data, then find data points closest in sample space. Unscale before returning points diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 6f27569e3d..bc1ccda5ed 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -24,6 +24,7 @@ HaltonSampling, HammersleySampling, CVTSampling, + CustomSampling, SamplingMethods, FeatureScaling, ) @@ -2240,5 +2241,457 @@ def test_sample_points_02(self, array_type): ) +class TestCustomSampling: + input_array = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(10)] + input_array_list = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(2)] + y = np.array( + [ + [i, j, ((i + 1) ** 2) + ((j + 1) ** 2)] + for i in np.linspace(0, 10, 21) + for j in np.linspace(0, 10, 21) + ] + ) + full_data = {"x1": y[:, 0], "x2": y[:, 1], "y": y[:, 2]} + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_01(self, array_type): + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 5) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ['uniform', 'normal'] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_02(self, array_type): + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=6, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ['uniform', 'normal'] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_03(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=0, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_04(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=-1, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_05(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=101, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_06(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=1.1, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_07(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_08(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection" + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_09(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_10(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_11(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=None, + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_12(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(TypeError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=('uniform', 'normal'), + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_13(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(TypeError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 1.0] + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_14(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'binomial'] + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_01(self, array_type): + input_array = array_type(self.input_array_list) + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type=None, list_of_distributions=['uniform', 'normal', 'random'], + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 5) + assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_02(self, array_type): + input_array = array_type(self.input_array_list) + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 5) + assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_03(self, array_type): + input_array = array_type(self.input_array_list) + CSClass = CustomSampling( + input_array, number_of_samples=100, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 100) + assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_04(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=0, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_05(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=-1, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_06(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=1.1, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__creation_07(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [pd.DataFrame]) + def test__init__creation_08(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + def test__init__creation_09(self): + input_array = [[2, 11, 4.5]] + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + def test__init__creation_10(self): + input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + def test__init__creation_11(self): + input_array = [[1, 10, 3], np.array([2, 11, 4.5])] + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + def test__init__creation_12(self): + input_array = [[1, 10], [2, 11, 4.5]] + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + def test__init__creation_13(self): + input_array = [[2, 11, 4.5], [2, 11, 4.5]] + with pytest.raises(Exception): + csClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_14(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_15(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'uniform', 'normal', 'random'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_16(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=None, + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_17(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(TypeError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=('uniform', 'normal', 'random'), + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_18(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(TypeError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 1.0, 'normal'] + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__selection_19(self, array_type): + input_array = array_type(self.input_array_list) + with pytest.raises(ValueError): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'gaussian', 'normal'] + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__creation_selection_01(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type=1, list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__creation_selection_02(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, number_of_samples=None, sampling_type="jp", list_of_distributions=['uniform', 'normal'], + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_generate_from_dist_01(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['uniform', 'normal'] + ) + dist_type = 'uniform' + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert type(scaled_samples) == np.ndarray + assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert dist_res.__name__ == dist_type + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_generate_from_dist_02(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", + list_of_distributions=['uniform', 'normal'] + ) + dist_type = 'normal' + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert type(scaled_samples) == np.ndarray + assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert dist_res.__name__ == dist_type + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_generate_from_dist_03(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", + list_of_distributions=['uniform', 'normal'] + ) + dist_type = 'random' + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert type(scaled_samples) == np.ndarray + assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert dist_res.__name__ == dist_type + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_generate_from_dist_04(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", + list_of_distributions=['uniform', 'normal'] + ) + for dist_type in ['uniform', 'random', 'uniform']: + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert type(scaled_samples) == np.ndarray + assert scaled_samples.shape == (CSClass.number_of_samples,) + assert dist_res.__name__ == dist_type + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_sample_points_01(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + ) + unique_sample_points = CSClass.sample_points() + expected_testing = np.array( + [True] * unique_sample_points.shape[0], dtype=bool + ) + out_testing = [ + unique_sample_points[i, :] in input_array + for i in range(unique_sample_points.shape[0]) + ] + np.testing.assert_array_equal( + np.unique(unique_sample_points, axis=0), unique_sample_points + ) + np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_02(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array_list) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="creation", list_of_distributions=['random', 'normal', 'uniform'] + ) + unique_sample_points = CSClass.sample_points() + input_array = np.array(input_array) + for i in range(input_array.shape[1]): + var_range = input_array[:, i] + assert (unique_sample_points[:, i] >= var_range[0]).all() and ( + unique_sample_points[:, i] <= var_range[1] + ).all() + np.testing.assert_array_equal( + np.unique(unique_sample_points, axis=0).shape, + unique_sample_points.shape, + ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [pd.DataFrame]) + def test_sample_points_03(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.full_data) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + ) + unique_sample_points = CSClass.sample_points() + expected_testing = np.array( + [True] * unique_sample_points.shape[0], dtype=bool + ) + unique_sample_points = np.array(unique_sample_points) + out_testing = [ + unique_sample_points[i, :] in np.array(input_array) + for i in range(unique_sample_points.shape[0]) + ] + np.testing.assert_array_equal( + np.unique(unique_sample_points, axis=0), unique_sample_points + ) + np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_04(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array_list) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="creation", list_of_distributions=['random', 'normal', 'uniform'] + ) + unique_sample_points = CSClass.sample_points() + assert len(CSClass.dist_vector) == len(input_array[0]) + assert unique_sample_points.shape[0] == CSClass.number_of_samples + assert unique_sample_points.shape[1] == len(input_array[0]) + assert type(unique_sample_points) == np.ndarray + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [pd.DataFrame]) + def test_sample_points_05(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.full_data) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + ) + unique_sample_points = CSClass.sample_points() + assert len(CSClass.dist_vector) == input_array.shape[1] - 1 + assert unique_sample_points.shape[1] == input_array.shape[1] + assert type(unique_sample_points) == pd.DataFrame + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array]) + def test_sample_points_06(self, array_type): + for num_samples in [None, 10, 1]: + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'uniform'] + ) + unique_sample_points = CSClass.sample_points() + assert len(CSClass.dist_vector) == input_array.shape[1] - 1 + assert unique_sample_points.shape[1] == input_array.shape[1] + assert type(unique_sample_points) == np.ndarray + + if __name__ == "__main__": pytest.main() From 1efb59ceefd08e3f31e55c1f6631dbfea1a905c3 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Thu, 7 Dec 2023 21:37:44 -0800 Subject: [PATCH 06/19] running black... --- idaes/core/surrogate/pysmo/sampling.py | 6 +- .../surrogate/pysmo/tests/test_sampling.py | 292 ++++++++++++++---- 2 files changed, 234 insertions(+), 64 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 6a5894a060..703ba60680 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1696,7 +1696,6 @@ def __init__( ) self.dist_vector = list_of_distributions - def generate_from_dist(self, dist_name): if dist_name.lower() in ["uniform", "random"]: dist = getattr(np.random.default_rng(), dist_name.lower()) @@ -1707,9 +1706,7 @@ def generate_from_dist(self, dist_name): var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) if sum( [1 for i in range(0, var_values.shape[0]) if var_values[i] > 1] - ) + sum( - [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] - ): + ) + sum([1 for i in range(0, var_values.shape[0]) if var_values[i] < 0]): warnings.warn( "Points adjusted to remain within specified Gaussian bounds." ) @@ -1718,7 +1715,6 @@ def generate_from_dist(self, dist_name): ) return dist, var_values_truncated - def sample_points(self): points_spread = [] for i in self.dist_vector: diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index bc1ccda5ed..0dbd01d676 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -2252,68 +2252,97 @@ class TestCustomSampling: ] ) full_data = {"x1": y[:, 0], "x2": y[:, 1], "y": y[:, 2]} + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_01(self, array_type): input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) np.testing.assert_array_equal(CSClass.data, input_array) np.testing.assert_array_equal(CSClass.number_of_samples, 5) np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) - assert CSClass.dist_vector == ['uniform', 'normal'] + assert CSClass.dist_vector == ["uniform", "normal"] + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_02(self, array_type): input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=6, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) np.testing.assert_array_equal(CSClass.data, input_array) np.testing.assert_array_equal(CSClass.number_of_samples, 6) np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) - assert CSClass.dist_vector == ['uniform', 'normal'] + assert CSClass.dist_vector == ["uniform", "normal"] + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_03(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=0, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=0, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_04(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=-1, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=-1, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_05(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=101, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=101, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_06(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=1.1, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=1.1, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_07(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_08(self, array_type): @@ -2322,285 +2351,410 @@ def test__init__selection_08(self, array_type): CSClass = CustomSampling( input_array, number_of_samples=None, sampling_type="selection" ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_09(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_10(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform'], + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_11(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=None, + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=None, ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_12(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(TypeError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=('uniform', 'normal'), + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=("uniform", "normal"), ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_13(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(TypeError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 1.0] + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform", 1.0], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_14(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection", list_of_distributions=['uniform', 'binomial'] + input_array, + number_of_samples=None, + sampling_type="selection", + list_of_distributions=["uniform", "binomial"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_01(self, array_type): input_array = array_type(self.input_array_list) CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type=None, list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type=None, + list_of_distributions=["uniform", "normal", "random"], ) np.testing.assert_array_equal(CSClass.data, input_array) np.testing.assert_array_equal(CSClass.number_of_samples, 5) - assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + assert CSClass.dist_vector == ["uniform", "normal", "random"] + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_02(self, array_type): input_array = array_type(self.input_array_list) CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) np.testing.assert_array_equal(CSClass.data, input_array) np.testing.assert_array_equal(CSClass.number_of_samples, 5) - assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + assert CSClass.dist_vector == ["uniform", "normal", "random"] + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_03(self, array_type): input_array = array_type(self.input_array_list) CSClass = CustomSampling( - input_array, number_of_samples=100, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=100, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) np.testing.assert_array_equal(CSClass.data, input_array) np.testing.assert_array_equal(CSClass.number_of_samples, 100) - assert CSClass.dist_vector == ['uniform', 'normal', 'random'] + assert CSClass.dist_vector == ["uniform", "normal", "random"] + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_04(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=0, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=0, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_05(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=-1, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=-1, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_06(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=1.1, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=1.1, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__creation_07(self, array_type): input_array = array_type(self.input_array) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [pd.DataFrame]) def test__init__creation_08(self, array_type): input_array = array_type(self.input_array) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit def test__init__creation_09(self): input_array = [[2, 11, 4.5]] with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit def test__init__creation_10(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit def test__init__creation_11(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit def test__init__creation_12(self): input_array = [[1, 10], [2, 11, 4.5]] with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit def test__init__creation_13(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] with pytest.raises(Exception): csClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_14(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_15(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'uniform', 'normal', 'random'], + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "uniform", "normal", "random"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_16(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=None, + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=None, ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_17(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(TypeError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=('uniform', 'normal', 'random'), + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=("uniform", "normal", "random"), ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_18(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(TypeError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 1.0, 'normal'] + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", 1.0, "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__selection_19(self, array_type): input_array = array_type(self.input_array_list) with pytest.raises(ValueError): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="creation", list_of_distributions=['uniform', 'gaussian', 'normal'] + input_array, + number_of_samples=None, + sampling_type="creation", + list_of_distributions=["uniform", "gaussian", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__creation_selection_01(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type=1, list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=None, + sampling_type=1, + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__creation_selection_02(self, array_type): input_array = array_type(self.input_array) with pytest.raises(Exception): CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="jp", list_of_distributions=['uniform', 'normal'], + input_array, + number_of_samples=None, + sampling_type="jp", + list_of_distributions=["uniform", "normal"], ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_generate_from_dist_01(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['uniform', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) - dist_type = 'uniform' + dist_type = "uniform" dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) assert type(scaled_samples) == np.ndarray - assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert scaled_samples.shape == (CSClass.number_of_samples,) assert dist_res.__name__ == dist_type + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_generate_from_dist_02(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", - list_of_distributions=['uniform', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) - dist_type = 'normal' + dist_type = "normal" dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) assert type(scaled_samples) == np.ndarray - assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert scaled_samples.shape == (CSClass.number_of_samples,) assert dist_res.__name__ == dist_type + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_generate_from_dist_03(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", - list_of_distributions=['uniform', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) - dist_type = 'random' + dist_type = "random" dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) assert type(scaled_samples) == np.ndarray - assert scaled_samples.shape == (CSClass.number_of_samples, ) + assert scaled_samples.shape == (CSClass.number_of_samples,) assert dist_res.__name__ == dist_type + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_generate_from_dist_04(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", - list_of_distributions=['uniform', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], ) - for dist_type in ['uniform', 'random', 'uniform']: + for dist_type in ["uniform", "random", "uniform"]: dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) assert type(scaled_samples) == np.ndarray assert scaled_samples.shape == (CSClass.number_of_samples,) assert dist_res.__name__ == dist_type + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_sample_points_01(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["random", "normal"], ) unique_sample_points = CSClass.sample_points() expected_testing = np.array( @@ -2614,13 +2768,17 @@ def test_sample_points_01(self, array_type): np.unique(unique_sample_points, axis=0), unique_sample_points ) np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test_sample_points_02(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array_list) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="creation", list_of_distributions=['random', 'normal', 'uniform'] + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], ) unique_sample_points = CSClass.sample_points() input_array = np.array(input_array) @@ -2633,13 +2791,17 @@ def test_sample_points_02(self, array_type): np.unique(unique_sample_points, axis=0).shape, unique_sample_points.shape, ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [pd.DataFrame]) def test_sample_points_03(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.full_data) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["random", "normal"], ) unique_sample_points = CSClass.sample_points() expected_testing = np.array( @@ -2654,38 +2816,50 @@ def test_sample_points_03(self, array_type): np.unique(unique_sample_points, axis=0), unique_sample_points ) np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test_sample_points_04(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array_list) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="creation", list_of_distributions=['random', 'normal', 'uniform'] + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], ) unique_sample_points = CSClass.sample_points() assert len(CSClass.dist_vector) == len(input_array[0]) assert unique_sample_points.shape[0] == CSClass.number_of_samples assert unique_sample_points.shape[1] == len(input_array[0]) assert type(unique_sample_points) == np.ndarray + @pytest.mark.unit @pytest.mark.parametrize("array_type", [pd.DataFrame]) def test_sample_points_05(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.full_data) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'normal'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["random", "normal"], ) unique_sample_points = CSClass.sample_points() assert len(CSClass.dist_vector) == input_array.shape[1] - 1 assert unique_sample_points.shape[1] == input_array.shape[1] assert type(unique_sample_points) == pd.DataFrame + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_sample_points_06(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( - input_array, number_of_samples=num_samples, sampling_type="selection", list_of_distributions=['random', 'uniform'] + input_array, + number_of_samples=num_samples, + sampling_type="selection", + list_of_distributions=["random", "uniform"], ) unique_sample_points = CSClass.sample_points() assert len(CSClass.dist_vector) == input_array.shape[1] - 1 From 741865a29dccdd380261e36da291df35ed04a162 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Fri, 8 Dec 2023 10:52:34 -0800 Subject: [PATCH 07/19] Updating docs and example. --- .../surrogate/sampling/index.rst | 1 + .../surrogate/sampling/pysmo_custom.rst | 22 +++++++++++++++++++ idaes/core/surrogate/pysmo/sampling.py | 2 +- 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst diff --git a/docs/explanations/modeling_extensions/surrogate/sampling/index.rst b/docs/explanations/modeling_extensions/surrogate/sampling/index.rst index 0dfd301e87..db12c77c76 100644 --- a/docs/explanations/modeling_extensions/surrogate/sampling/index.rst +++ b/docs/explanations/modeling_extensions/surrogate/sampling/index.rst @@ -15,6 +15,7 @@ The PySMO package offers five common sampling methods for one-shot design: pysmo_halton pysmo_hammersley pysmo_cvt + pysmo_custom pysmo_sampling_properties diff --git a/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst new file mode 100644 index 0000000000..d0cfef2ef4 --- /dev/null +++ b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst @@ -0,0 +1,22 @@ +Custom Sampling (LHS) +=========================================== +With this method, users can explicitly define the distribution for the sampling of each input variable explicitly. + +The ``pysmo.sampling.CustomSampling`` method carries out the user-defined sampling strategy. This can be done in two modes: + +* The samples can be selected from a user-provided dataset, or +* The samples can be generated from a set of provided bounds. + +We currently support three distributions options for sampling: + +* "random", for sampling from a random distribution +* "uniform", for sampling from a uniform distribution +* "normal", for sampling from a Gaussian distribution + +Available Methods +------------------ + +.. autoclass:: idaes.core.surrogate.pysmo.sampling.CustomSampling + :members: __init__, sample_points + + diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 703ba60680..3c04f904d7 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1550,7 +1550,7 @@ class CustomSampling(SamplingMethods): .. code-block:: python # To select 50 samples on a (10 x 5) grid in a 2D space: - >>> b = rbf.UniformSampling(data, [10, 5], sampling_type="selection") + >>> b = rbf.CustomSampling(data, [10, 5], list_of_distributions= ['normal', 'uniform'], sampling_type="selection") >>> samples = b.sample_points() """ From cdde009f6ab9f926a1dc77e627bd0d7857781934 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:03:18 -0800 Subject: [PATCH 08/19] Fix docs --- .../modeling_extensions/surrogate/sampling/pysmo_custom.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst index d0cfef2ef4..401c7f299e 100644 --- a/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst +++ b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst @@ -1,4 +1,4 @@ -Custom Sampling (LHS) +Custom Sampling =========================================== With this method, users can explicitly define the distribution for the sampling of each input variable explicitly. From 879d720bc267a92752afb9ac4a2c47ce3985911d Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:13:19 -0800 Subject: [PATCH 09/19] Improve docsstrings. --- idaes/core/surrogate/pysmo/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 3c04f904d7..2b4262bb97 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1574,7 +1574,7 @@ def __init__( - When the aim is to generate a set of samples from a data range, the dataset must be a list containing two lists of equal lengths which contain the variable bounds and **sampling_type** option must be set to "creation". It is assumed that the range contains no output variable information in this case. number_of_samples(int): The number of samples to be generated. Should be a positive integer less than or equal to the number of entries (rows) in **data_input**. - list_of_distributions (list): The list containing the probability distribution for each variable. We currently support random, uniform and normal(i.e. Gaussian) distributions. + list_of_distributions (list): The list containing the probability distribution for each variable. The length of the list must match the number of input (i.e. dependent) variables to be sampled. We currently support random, uniform and normal (i.e. Gaussian) distributions. sampling_type (str) : Option which determines whether the algorithm selects samples from an existing dataset ("selection") or attempts to generate sample from a supplied range ("creation"). Default is "creation". Keyword Args: From 6920ce3481bc0a00b15d35ed52ac6ec127528eaa Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Fri, 8 Dec 2023 21:12:23 -0800 Subject: [PATCH 10/19] Improving tests based on feedback --- idaes/core/surrogate/pysmo/sampling.py | 235 +++-- .../surrogate/pysmo/tests/test_sampling.py | 981 ++++++++++++------ 2 files changed, 797 insertions(+), 419 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 2b4262bb97..c60f618f42 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -501,11 +501,11 @@ def __init__( **self** function containing the input information Raises: - ValueError: The input data (**data_input**) is the wrong type. + ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative) - IndexError: When invalid column names are supplied in **xlabels** or **ylabels** + TypeError: When **number_of_samples** is not the right type, or **sampling_type** entry is not a string. - Exception: When **number_of_samples** is invalid (not an integer, too large, zero, or negative) + IndexError: When invalid column names are supplied in **xlabels** or **ylabels** """ @@ -514,14 +514,14 @@ def __init__( self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -541,13 +541,13 @@ def __init__( ) number_of_samples = 5 elif number_of_samples > self.data.shape[0]: - raise Exception( + raise ValueError( "LHS sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples elif self.sampling_type == "creation": @@ -555,16 +555,19 @@ def __init__( raise ValueError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -585,9 +588,9 @@ def __init__( ) number_of_samples = 5 elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case @@ -740,28 +743,26 @@ def __init__( **self** function containing the input information Raises: - ValueError: The **data_input** is the wrong type + ValueError: The **data_input** is the wrong type, or **list_of_samples_per_variable** is of the wrong length, or **list_of_samples_per_variable** is invalid. - ValueError: When **list_of_samples_per_variable** is of the wrong length, is not a list or contains elements other than integers + TypeError: When **list_of_samples_per_variable** is not a list, or **list_of_samples_per_variable** contains elements other than integers, **sampling_type** is not a string, or **edges** entry is not Boolean IndexError: When invalid column names are supplied in **xlabels** or **ylabels** - Exception: When **edges** entry is not Boolean - """ if sampling_type is None: sampling_type = "creation" self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -779,16 +780,19 @@ def __init__( raise ValueError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -807,7 +811,7 @@ def __init__( edges = True self.edge = edges elif not isinstance(edges, bool): - raise Exception('Invalid "edges" entry. Must be boolean') + raise TypeError('Invalid "edges" entry. Must be boolean') elif (edges is True) or (edges is False): self.edge = edges @@ -832,7 +836,7 @@ def __init__( self.sampling_type == "selection" and self.number_of_samples > self.data.shape[0] ): - raise Exception( + raise ValueError( "Sample size cannot be greater than number of samples in the input data set" ) @@ -922,11 +926,11 @@ def __init__( **self** function containing the input information. Raises: - ValueError: The **data_input** is the wrong type. + ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative) - IndexError: When invalid column names are supplied in **xlabels** or **ylabels** + TypeError: When **number_of_samples** is not the right type, or **sampling_type** entry is not a string. - Exception: When the **number_of_samples** is invalid (not an integer, too large, zero or negative.) + IndexError: When invalid column names are supplied in **xlabels** or **ylabels** """ if sampling_type is None: @@ -934,14 +938,14 @@ def __init__( self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -961,30 +965,33 @@ def __init__( ) number_of_samples = 5 elif number_of_samples > self.data.shape[0]: - raise Exception( + raise ValueError( "Sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples elif self.sampling_type == "creation": if not isinstance(data_input, list): - raise ValueError( + raise TypeError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -1005,9 +1012,9 @@ def __init__( ) number_of_samples = 5 elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case @@ -1100,11 +1107,11 @@ def __init__( **self** function containing the input information. Raises: - ValueError: When **data_input** is the wrong type. + ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative) - IndexError: When invalid column names are supplied in **xlabels** or **ylabels** + TypeError: When **number_of_samples** is not the right type, or **sampling_type** entry is not a string. - Exception: When the **number_of_samples** is invalid (not an integer, too large, zero, negative) + IndexError: When invalid column names are supplied in **xlabels** or **ylabels** """ if sampling_type is None: @@ -1112,14 +1119,14 @@ def __init__( self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -1139,13 +1146,13 @@ def __init__( ) number_of_samples = 5 elif number_of_samples > self.data.shape[0]: - raise Exception( + raise ValueError( "Sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples elif self.sampling_type == "creation": @@ -1153,16 +1160,19 @@ def __init__( raise ValueError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -1183,9 +1193,9 @@ def __init__( ) number_of_samples = 5 elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case @@ -1284,13 +1294,15 @@ def __init__( Raises: - ValueError: When **data_input** is the wrong type. + ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative) - IndexError: When invalid column names are supplied in **xlabels** or **ylabels** + ValueError: When the tolerance specified is too loose (tolerance > 0.1) - Exception: When the **number_of_samples** is invalid (not an integer, too large, zero, negative) + TypeError: When **number_of_samples** is not the right type, or **sampling_type** entry is not a string + + IndexError: When invalid column names are supplied in **xlabels** or **ylabels** - Exception: When the tolerance specified is too loose (tolerance > 0.1) or invalid + Exception: When the tolerance specified is invalid warnings.warn: when the tolerance specified by the user is too tight (tolerance < :math:`10^{-9}`) @@ -1300,14 +1312,14 @@ def __init__( self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -1327,13 +1339,13 @@ def __init__( ) number_of_samples = 5 elif number_of_samples > self.data.shape[0]: - raise Exception( + raise ValueError( "CVT sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_centres = number_of_samples elif self.sampling_type == "creation": @@ -1341,16 +1353,19 @@ def __init__( raise ValueError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -1371,9 +1386,9 @@ def __init__( ) number_of_samples = 5 elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_centres = number_of_samples x_data = bounds_array # Only x data will be present in this case @@ -1385,7 +1400,7 @@ def __init__( if tolerance is None: tolerance = 1e-7 elif tolerance > 0.1: - raise Exception("Tolerance must be less than 0.1 to achieve good results") + raise ValueError("Tolerance must be less than 0.1 to achieve good results") elif tolerance < 1e-9: warnings.warn( "Tolerance too tight. CVT algorithm may take long time to converge." @@ -1585,13 +1600,12 @@ def __init__( **self** function containing the input information Raises: - ValueError: The **data_input** is the wrong type + ValueError: The input data (**data_input**) is the wrong type/dimension, or **number_of_samples** is invalid (too large, zero, or negative), **list_of_distributions** is the wrong length, or a non-implemented distribution is supplied in **list_of_distributions**. - ValueError: When a non-implemented distribution is supplied in list_of_distributions + TypeError: When **number_of_samples** is not an integer, **list_of_distributions** is not a list, or **sampling_type** entry is not a string IndexError: When invalid column names are supplied in **xlabels** or **ylabels** - Exception: When the **number_of_samples** is invalid (not an integer, too large, zero, negative) """ @@ -1600,14 +1614,14 @@ def __init__( self.sampling_type = sampling_type print("Creation-type sampling will be used.") elif not isinstance(sampling_type, str): - raise Exception("Invalid sampling type entry. Must be of type .") + raise TypeError("Invalid sampling type entry. Must be of type .") elif (sampling_type.lower() == "creation") or ( sampling_type.lower() == "selection" ): sampling_type = sampling_type.lower() self.sampling_type = sampling_type else: - raise Exception( + raise ValueError( 'Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.' ) print("Sampling type: ", self.sampling_type, "\n") @@ -1627,13 +1641,13 @@ def __init__( ) number_of_samples = 5 elif number_of_samples > self.data.shape[0]: - raise Exception( + raise ValueError( "Sample size cannot be greater than number of samples in the input data set" ) elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples elif self.sampling_type == "creation": @@ -1641,16 +1655,19 @@ def __init__( raise ValueError( 'List entry of two elements expected for sampling_type "creation."' ) - elif len(data_input) != 2: - raise Exception("data_input must contain two lists of equal lengths.") - elif not isinstance(data_input[0], list) or not isinstance( - data_input[1], list + elif ( + len(data_input) != 2 + or not isinstance(data_input[0], list) + or not isinstance(data_input[1], list) + or len(data_input[0]) != len(data_input[1]) ): - raise Exception("data_input must contain two lists of equal lengths.") - elif len(data_input[0]) != len(data_input[1]): - raise Exception("data_input must contain two lists of equal lengths.") + raise ValueError("data_input must contain two lists of equal lengths.") elif data_input[0] == data_input[1]: - raise Exception("Invalid entry: both lists are equal.") + raise ValueError("Invalid entry: both lists are equal.") + elif any(x == y for x, y in zip(data_input[0], data_input[1])): + raise ValueError( + "Invalid entry: at least one variable contains the same value for the lower and upper bounds." + ) else: bounds_array = np.zeros( ( @@ -1671,9 +1688,9 @@ def __init__( ) number_of_samples = 5 elif not isinstance(number_of_samples, int): - raise Exception("number_of_samples must be an integer.") + raise TypeError("number_of_samples must be an integer.") elif number_of_samples <= 0: - raise Exception("number_of_samples must a positive, non-zero integer.") + raise ValueError("number_of_samples must a positive, non-zero integer.") self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 0dbd01d676..c112fe7da5 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -470,7 +470,7 @@ class TestLatinHypercubeSampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array) LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="selection" @@ -481,7 +481,9 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array) LHSClass = LatinHypercubeSampling( input_array, number_of_samples=6, sampling_type="selection" @@ -492,52 +494,62 @@ def test__init__selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_zero_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=0, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_negative_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=-1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="LHS sample size cannot be greater than number of samples in the input data set", + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=101, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=1.1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_07(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type=None @@ -547,7 +559,7 @@ def test__init__creation_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array_list) LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" @@ -557,7 +569,9 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array_list) LHSClass = LatinHypercubeSampling( input_array, number_of_samples=100, sampling_type="creation" @@ -567,36 +581,43 @@ def test__init__creation_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation_zero_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=0, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_negative_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=-1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_06(self, array_type): + def test__init__creation_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=1.1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_07(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='List entry of two elements expected for sampling_type "creation."', + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @@ -611,63 +632,90 @@ def test__init__creation_08(self, array_type): ) @pytest.mark.unit - def test__init__creation_09(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_10(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_11(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_12(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_13(self): + def test__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_01(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type=1 ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_02(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=None, sampling_type="jp" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__test_single_equal_ub_lb(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises( + ValueError, + match="Invalid entry: at least one variable contains the same value for the lower and upper bounds.", + ): + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=None, + sampling_type="creation", + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test_variable_sample_creation(self, array_type): @@ -811,7 +859,7 @@ class TestUniformSampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour(self, array_type): input_array = array_type(self.input_array) UniClass = UniformSampling(input_array, [2, 5], sampling_type="selection") np.testing.assert_array_equal(UniClass.data, input_array) @@ -820,62 +868,90 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_wrong_type_for_list_of_samples_per_variable_01( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="list_of_samples_per_variable: list required." + ): UniClass = UniformSampling( input_array, np.array([2, 5]), sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_wrong_type_for_list_of_samples_per_variable_02( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="list_of_samples_per_variable: list required." + ): UniClass = UniformSampling( input_array, pd.DataFrame([2, 5]), sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_wrong_length_for_list_of_samples_per_variable_01( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_samples_per_variable must equal the number of variables.", + ): UniClass = UniformSampling(input_array, [2], sampling_type="selection") @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_wrong_length_for_list_of_samples_per_variable_02( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_samples_per_variable must equal the number of variables.", + ): UniClass = UniformSampling( input_array, [2, 5, 5], sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_negative_entry_in_list_of_samples_per_variable( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="All variables must have at least two points per dimension", + ): UniClass = UniformSampling(input_array, [-2, 5], sampling_type="selection") @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_07(self, array_type): + def test__init__selection_fractional_entry_in_list_of_samples_per_variable( + self, array_type + ): input_array = array_type(self.input_array) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="All values in list must be integers"): UniClass = UniformSampling(input_array, [2.1, 5], sampling_type="selection") @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_08(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="Sample size cannot be greater than number of samples in the input data set", + ): UniClass = UniformSampling(input_array, [2, 50], sampling_type="selection") @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_09(self, array_type): + def test__init__selection_assert_correct_behaviour_edge_true(self, array_type): input_array = array_type(self.input_array) UniClass = UniformSampling( input_array, [2, 5], sampling_type="selection", edges=True @@ -883,10 +959,11 @@ def test__init__selection_09(self, array_type): np.testing.assert_array_equal(UniClass.data, input_array) np.testing.assert_array_equal(UniClass.number_of_samples, 10) np.testing.assert_array_equal(UniClass.x_data, np.array(input_array)[:, :-1]) + assert UniClass.edge == True @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_10(self, array_type): + def test__init__selection_assert_correct_behaviour_edge_false(self, array_type): input_array = array_type(self.input_array) UniClass = UniformSampling( input_array, [2, 5], sampling_type="selection", edges=False @@ -894,35 +971,39 @@ def test__init__selection_10(self, array_type): np.testing.assert_array_equal(UniClass.data, input_array) np.testing.assert_array_equal(UniClass.number_of_samples, 10) np.testing.assert_array_equal(UniClass.x_data, np.array(input_array)[:, :-1]) + assert UniClass.edge == False @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_11(self, array_type): + def test__init__selection_nonboolean_edge_entry_01(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match='Invalid "edges" entry. Must be boolean'): UniClass = UniformSampling( input_array, [2, 5], sampling_type="selection", edges=1 ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_12(self, array_type): + def test__init__selection_nonboolean_edge_entry_02(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match='Invalid "edges" entry. Must be boolean'): UniClass = UniformSampling( input_array, [2, 5], sampling_type="selection", edges="x" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_13(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): UniClass = UniformSampling(input_array, [2, 5], sampling_type="selection") @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) np.testing.assert_array_equal(UniClass.data, input_array) @@ -930,7 +1011,7 @@ def test__init__creation_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_behaviour_with_specified_sampling(self, array_type): input_array = array_type(self.input_array_list) UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type="creation") np.testing.assert_array_equal(UniClass.data, input_array) @@ -938,80 +1019,125 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_wrong_entry_in_list_of_samples_per_variable( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="All variables must have at least two points per dimension", + ): UniClass = UniformSampling(input_array, [1, 7, 5], sampling_type="creation") @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation__negative_entry_in_list_of_samples_per_variable( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="All variables must have at least two points per dimension", + ): UniClass = UniformSampling( input_array, [-1, 7, 5], sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_invalid_entry_in_list_of_samples_per_variable( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="All variables must have at least two points per dimension", + ): UniClass = UniformSampling( input_array, [1.1, 7, 5], sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_06(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='List entry of two elements expected for sampling_type "creation."', + ): UniClass = UniformSampling(input_array, [2, 5], sampling_type="creation") @pytest.mark.unit - def test__init__creation_08(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) @pytest.mark.unit - def test__init__creation_09(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) @pytest.mark.unit - def test__init__creation_10(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) @pytest.mark.unit - def test__init__creation_11(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) @pytest.mark.unit - def test__init__creation_12(self): + def est__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): UniClass = UniformSampling(input_array, [2, 7, 5], sampling_type=None) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_01(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): UniClass = UniformSampling(input_array, [2, 5], sampling_type=1) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_02(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): UniClass = UniformSampling(input_array, [2, 5], sampling_type="jp") + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_equal_input_output_bounds_one(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises( + ValueError, + match="Invalid entry: at least one variable contains the same value for the lower and upper bounds.", + ): + UniClass = UniformSampling( + input_array, + [2, 7, 5], + sampling_type="creation", + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_sample_points_01(self, array_type): @@ -1148,7 +1274,7 @@ class TestHaltonSampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array) HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="selection" @@ -1159,7 +1285,9 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array) HaltonClass = HaltonSampling( input_array, number_of_samples=6, sampling_type="selection" @@ -1170,79 +1298,97 @@ def test__init__selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_zero_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=0, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_negative_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=-1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="Sample size cannot be greater than number of samples in the input data set", + ): HaltonClass = HaltonSampling( input_array, number_of_samples=101, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): HaltonClass = HaltonSampling( input_array, number_of_samples=1.1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_07(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_08(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type=[1, 2, 3] ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_09(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="choose" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_10(self, array_type): + def test__init__selection_method_dimensionality_exceeded(self, array_type): input_array = array_type(self.input_array_high) - with pytest.raises(Exception): + with pytest.raises( + Exception, + match="Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions", + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type=None @@ -1252,7 +1398,7 @@ def test__init__creation_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_hahaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array_list) HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" @@ -1262,7 +1408,9 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_right_hahaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array_list) HaltonClass = HaltonSampling( input_array, number_of_samples=100, sampling_type="creation" @@ -1272,89 +1420,121 @@ def test__init__creation_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation_zero_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=0, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_negative_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=-1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_06(self, array_type): + def test__init__creation_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): HaltonClass = HaltonSampling( input_array, number_of_samples=1.1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_07(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + TypeError, + match='List entry of two elements expected for sampling_type "creation."', + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_09(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_10(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_11(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_12(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_13(self): + def test__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection(self, array_type): + def test__init__selection_dimensionality_exceeded(self, array_type): input_array = array_type(self.input_array_high) - with pytest.raises(Exception): + with pytest.raises( + Exception, + match="Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions", + ): HaltonClass = HaltonSampling( input_array, number_of_samples=None, sampling_type="selection" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__test_single_equal_ub_lb(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises( + ValueError, + match="Invalid entry: at least one variable contains the same value for the lower and upper bounds.", + ): + HaltonClass = HaltonSampling( + input_array, + number_of_samples=5, + sampling_type="creation", + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_sample_points_01(self, array_type): @@ -1469,7 +1649,7 @@ class TestHammersleySampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array) HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="selection" @@ -1482,7 +1662,9 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array) HammersleyClass = HammersleySampling( input_array, number_of_samples=6, sampling_type="selection" @@ -1495,79 +1677,97 @@ def test__init__selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_zero_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=0, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_negative_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=-1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="Sample size cannot be greater than number of samples in the input data set", + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=101, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): HammersleyClass = HammersleySampling( input_array, number_of_samples=1.1, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_07(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_08(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type=[1, 2, 3] ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_09(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="choose" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_10(self, array_type): + def test__init__selection_method_dimensionality_exceeded(self, array_type): input_array = array_type(self.input_array_high) - with pytest.raises(Exception): + with pytest.raises( + Exception, + match="Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions", + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="selection" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_hahaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type=None @@ -1577,7 +1777,7 @@ def test__init__creation_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array_list) HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" @@ -1587,7 +1787,9 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array_list) HammersleyClass = HammersleySampling( input_array, number_of_samples=100, sampling_type="creation" @@ -1597,89 +1799,121 @@ def test__init__creation_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation_zero_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=0, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_negative_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=-1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_06(self, array_type): + def test__init__creation_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): HammersleyClass = HammersleySampling( input_array, number_of_samples=1.1, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_06(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='List entry of two elements expected for sampling_type "creation."', + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_09(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_10(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_11(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_12(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_13(self): + def test__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection(self, array_type): + def test__init__selection_method_dimensionality_exceeded(self, array_type): input_array = array_type(self.input_array_large) - with pytest.raises(Exception): + with pytest.raises( + Exception, + match="Dimensionality problem: This method is not available for problems with dimensionality > 10: the performance of the method degrades substantially at higher dimensions", + ): HammersleyClass = HammersleySampling( input_array, number_of_samples=None, sampling_type="selection" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_equal_input_output_bounds_one(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises( + ValueError, + match="Invalid entry: at least one variable contains the same value for the lower and upper bounds.", + ): + HammersleyClass = HammersleySampling( + input_array, + number_of_samples=5, + sampling_type=None, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_sample_points_01(self, array_type): @@ -1752,7 +1986,7 @@ class TestCVTSampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array) CVTClass = CVTSampling( input_array, @@ -1767,7 +2001,9 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array) CVTClass = CVTSampling( input_array, number_of_samples=6, tolerance=None, sampling_type="selection" @@ -1779,9 +2015,11 @@ def test__init__selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_zero_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CVTClass = CVTSampling( input_array, number_of_samples=0, @@ -1791,9 +2029,11 @@ def test__init__selection_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_negative_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CVTClass = CVTSampling( input_array, number_of_samples=-1, @@ -1803,9 +2043,12 @@ def test__init__selection_04(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="CVT sample size cannot be greater than number of samples in the input data set", + ): CVTClass = CVTSampling( input_array, number_of_samples=101, @@ -1815,9 +2058,9 @@ def test__init__selection_05(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): CVTClass = CVTSampling( input_array, number_of_samples=1.1, @@ -1827,9 +2070,12 @@ def test__init__selection_06(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_07(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1839,9 +2085,11 @@ def test__init__selection_07(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_08(self, array_type): + def test__init__selection_tolerance_too_loose(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="Tolerance must be less than 0.1 to achieve good results" + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1851,9 +2099,12 @@ def test__init__selection_08(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_09(self, array_type): + def test__init__selection_tolerance_too_tight(self, array_type): input_array = array_type(self.input_array) - with pytest.warns(Warning): + with pytest.warns( + Warning, + match="Tolerance too tight. CVT algorithm may take long time to converge.", + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1863,7 +2114,7 @@ def test__init__selection_09(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_10(self, array_type): + def test__init__selection_valid_tolerance(self, array_type): input_array = array_type(self.input_array) CVTClass = CVTSampling( input_array, @@ -1873,6 +2124,18 @@ def test__init__selection_10(self, array_type): ) np.testing.assert_array_equal(CVTClass.eps, 0.09) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_none_tolerance(self, array_type): + input_array = array_type(self.input_array) + CVTClass = CVTSampling( + input_array, + number_of_samples=None, + tolerance=None, + sampling_type="selection", + ) + np.testing.assert_array_equal(CVTClass.eps, 1e-7) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_11(self, array_type): @@ -1887,17 +2150,18 @@ def test__init__selection_11(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) CVTClass = CVTSampling( input_array, number_of_samples=None, tolerance=None, sampling_type=None ) np.testing.assert_array_equal(CVTClass.data, input_array) np.testing.assert_array_equal(CVTClass.number_of_centres, 5) + assert CVTClass.sampling_type == "creation" @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array_list) CVTClass = CVTSampling( input_array, @@ -1910,7 +2174,9 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array_list) CVTClass = CVTSampling( input_array, number_of_samples=100, tolerance=None, sampling_type="creation" @@ -1920,9 +2186,11 @@ def test__init__creation_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation_zero_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CVTClass = CVTSampling( input_array, number_of_samples=0, @@ -1932,9 +2200,11 @@ def test__init__creation_04(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_negative_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CVTClass = CVTSampling( input_array, number_of_samples=-1, @@ -1944,9 +2214,9 @@ def test__init__creation_05(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_06(self, array_type): + def test__init__creation_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): CVTClass = CVTSampling( input_array, number_of_samples=1.1, @@ -1956,9 +2226,12 @@ def test__init__creation_06(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_07(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='List entry of two elements expected for sampling_type "creation."', + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1968,9 +2241,11 @@ def test__init__creation_07(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_08(self, array_type): + def test__init__creation_test_tolerance_too_loose(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="Tolerance must be less than 0.1 to achieve good results" + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1980,9 +2255,12 @@ def test__init__creation_08(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_09(self, array_type): + def test__init__creation_tolerance_too_tight(self, array_type): input_array = array_type(self.input_array_list) - with pytest.warns(Warning): + with pytest.warns( + Warning, + match="Tolerance too tight. CVT algorithm may take long time to converge.", + ): CVTClass = CVTSampling( input_array, number_of_samples=None, @@ -1992,7 +2270,7 @@ def test__init__creation_09(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_10(self, array_type): + def test__init__creation_valid_tolerance(self, array_type): input_array = array_type(self.input_array_list) CVTClass = CVTSampling( input_array, @@ -2015,63 +2293,91 @@ def test__init__creation_11(self, array_type): np.testing.assert_array_equal(CVTClass.eps, -0.09) @pytest.mark.unit - def test__init__creation_13(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): LHSClass = CVTSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_14(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CVTClass = CVTSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_15(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CVTClass = CVTSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_16(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CVTClass = CVTSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit - def test__init__creation_17(self): + def test__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): CVTClass = CVTSampling( input_array, number_of_samples=None, sampling_type="creation" ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_01(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): CVTClass = CVTSampling( input_array, number_of_samples=None, tolerance=None, sampling_type=1 ) @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_02(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): CVTClass = CVTSampling( input_array, number_of_samples=None, tolerance=None, sampling_type="jp" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_equal_input_output_bounds_one(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises( + ValueError, + match="Invalid entry: at least one variable contains the same value for the lower and upper bounds.", + ): + CVTClass = CVTSampling( + input_array, + number_of_samples=5, + tolerance=None, + sampling_type=None, + ) + @pytest.mark.unit def test_random_sample_selection_01(self): size = (5, 2) @@ -2094,19 +2400,21 @@ def test_random_sample_selection_03(self): assert out_random_points.shape == size @pytest.mark.unit - def test_random_sample_selection_04(self): + def test_random_sample_selection_test_negative(self): size = (5, -1) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="negative dimensions are not allowed"): out_random_points = CVTSampling.random_sample_selection(size[0], size[1]) @pytest.mark.unit - def test_random_sample_selection_05(self): + def test_random_sample_selection_test_float(self): size = (5, 1.1) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="'float' object cannot be interpreted as an integer" + ): out_random_points = CVTSampling.random_sample_selection(size[0], size[1]) @pytest.mark.unit - def test_eucl_distance_01(self): + def test_eucl_distance_single_values(self): u = np.array([[3]]) v = np.array([[5]]) expected_output = 2 @@ -2114,7 +2422,7 @@ def test_eucl_distance_01(self): assert expected_output == output @pytest.mark.unit - def test_eucl_distance_02(self): + def test_eucl_distance_1d_arrays(self): u = np.array([[1, 2]]) v = np.array([[3, 4]]) expected_output = 8**0.5 @@ -2122,7 +2430,7 @@ def test_eucl_distance_02(self): assert expected_output == output @pytest.mark.unit - def test_eucl_distance_03(self): + def test_eucl_distance_2d_arrays(self): u = np.array([[1, 2], [3, 4]]) v = np.array([[5, 6], [7, 8]]) expected_output = np.array([32**0.5, 32**0.5]) @@ -2130,7 +2438,7 @@ def test_eucl_distance_03(self): np.testing.assert_array_equal(expected_output, output) @pytest.mark.unit - def test_eucl_distance_04(self): + def test_eucl_distance_1d_2d_arrays(self): u = np.array([[1, 2]]) v = np.array([[5, 6], [7, 8]]) expected_output = np.array([32**0.5, 72**0.5]) @@ -2255,7 +2563,7 @@ class TestCustomSampling: @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_01(self, array_type): + def test__init__selection_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array) CSClass = CustomSampling( input_array, @@ -2270,7 +2578,9 @@ def test__init__selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_02(self, array_type): + def test__init__selection_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array) CSClass = CustomSampling( input_array, @@ -2285,9 +2595,11 @@ def test__init__selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_03(self, array_type): + def test__init__selection_zero_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CSClass = CustomSampling( input_array, number_of_samples=0, @@ -2297,9 +2609,11 @@ def test__init__selection_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_04(self, array_type): + def test__init__selection_negative_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CSClass = CustomSampling( input_array, number_of_samples=-1, @@ -2309,9 +2623,12 @@ def test__init__selection_04(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_05(self, array_type): + def test__init__selection_excess_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match="Sample size cannot be greater than number of samples in the input data set", + ): CSClass = CustomSampling( input_array, number_of_samples=101, @@ -2321,9 +2638,9 @@ def test__init__selection_05(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_06(self, array_type): + def test__init__selection_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): CSClass = CustomSampling( input_array, number_of_samples=1.1, @@ -2333,9 +2650,12 @@ def test__init__selection_06(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_07(self, array_type): + def test__init__selection_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='Pandas dataframe or numpy array required for sampling_type "selection."', + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2343,20 +2663,16 @@ def test__init__selection_07(self, array_type): list_of_distributions=["uniform", "normal"], ) - @pytest.mark.unit - @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_08(self, array_type): - input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): - CSClass = CustomSampling( - input_array, number_of_samples=None, sampling_type="selection" - ) - @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_09(self, array_type): + def test__init__selection_list_distributions_length_exceeds_inputs( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_distributions must equal the number of variables.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2366,9 +2682,14 @@ def test__init__selection_09(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_10(self, array_type): + def test__init__selection_list_distributions_length_less_than_inputs( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_distributions must equal the number of variables.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2378,9 +2699,9 @@ def test__init__selection_10(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_11(self, array_type): + def test__init__selection_empty_distributions(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="list_of_distributions cannot be empty."): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2390,9 +2711,11 @@ def test__init__selection_11(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_12(self, array_type): + def test__init__selection_distribution_not_list(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="Error with list_of_distributions: list required." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2402,9 +2725,9 @@ def test__init__selection_12(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_13(self, array_type): + def test__init__selection_distribution_entry_not_string(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="All values in list must be strings"): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2414,9 +2737,12 @@ def test__init__selection_13(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__selection_14(self, array_type): + def test__init__selection_distribution_not_available(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="list_of_distributions only supports 'random', 'normal' and 'uniform' sampling options.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2426,7 +2752,7 @@ def test__init__selection_14(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_01(self, array_type): + def test__init__creation_right_hahaviour_with_none_samplingtype(self, array_type): input_array = array_type(self.input_array_list) CSClass = CustomSampling( input_array, @@ -2440,7 +2766,7 @@ def test__init__creation_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_02(self, array_type): + def test__init__creation_right_behaviour_with_none_no_samples(self, array_type): input_array = array_type(self.input_array_list) CSClass = CustomSampling( input_array, @@ -2454,7 +2780,9 @@ def test__init__creation_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_03(self, array_type): + def test__init__creation_right_behaviour_with_specified_no_samples( + self, array_type + ): input_array = array_type(self.input_array_list) CSClass = CustomSampling( input_array, @@ -2468,9 +2796,11 @@ def test__init__creation_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_04(self, array_type): + def test__init__creation_zero_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CSClass = CustomSampling( input_array, number_of_samples=0, @@ -2480,9 +2810,11 @@ def test__init__creation_04(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_05(self, array_type): + def test__init__creation_negative_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="number_of_samples must a positive, non-zero integer." + ): CSClass = CustomSampling( input_array, number_of_samples=-1, @@ -2492,9 +2824,9 @@ def test__init__creation_05(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_06(self, array_type): + def test__init__creation_non_integer_no_samples(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(Exception): + with pytest.raises(TypeError, match="number_of_samples must be an integer."): CSClass = CustomSampling( input_array, number_of_samples=1.1, @@ -2504,21 +2836,12 @@ def test__init__creation_06(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_07(self, array_type): - input_array = array_type(self.input_array) - with pytest.raises(ValueError): - CSClass = CustomSampling( - input_array, - number_of_samples=None, - sampling_type="creation", - list_of_distributions=["uniform", "normal", "random"], - ) - - @pytest.mark.unit - @pytest.mark.parametrize("array_type", [pd.DataFrame]) - def test__init__creation_08(self, array_type): + def test__init__creation_wrong_input_data_type(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match='List entry of two elements expected for sampling_type "creation."', + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2527,9 +2850,11 @@ def test__init__creation_08(self, array_type): ) @pytest.mark.unit - def test__init__creation_09(self): + def test__init__creation_missing_bounds(self): input_array = [[2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2538,9 +2863,11 @@ def test__init__creation_09(self): ) @pytest.mark.unit - def test__init__creation_10(self): + def test__init__creation_wrong_data_input_format_lb(self): input_array = [np.array([1, 10, 3]), [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2549,9 +2876,11 @@ def test__init__creation_10(self): ) @pytest.mark.unit - def test__init__creation_11(self): + def test__init__creation_wrong_data_input_format_ub(self): input_array = [[1, 10, 3], np.array([2, 11, 4.5])] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2560,9 +2889,11 @@ def test__init__creation_11(self): ) @pytest.mark.unit - def test__init__creation_12(self): + def test__init__creation_unequal_length_list_bounds(self): input_array = [[1, 10], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises( + ValueError, match="data_input must contain two lists of equal lengths." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2571,9 +2902,9 @@ def test__init__creation_12(self): ) @pytest.mark.unit - def test__init__creation_13(self): + def test__init__creation_equal_input_output_bounds_all(self): input_array = [[2, 11, 4.5], [2, 11, 4.5]] - with pytest.raises(Exception): + with pytest.raises(ValueError, match="Invalid entry: both lists are equal."): csClass = CustomSampling( input_array, number_of_samples=None, @@ -2583,9 +2914,14 @@ def test__init__creation_13(self): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_14(self, array_type): + def test__init__creation_list_distributions_length_less_than_inputs( + self, array_type + ): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_distributions must equal the number of variables.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2595,9 +2931,12 @@ def test__init__creation_14(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__creation_15(self, array_type): + def test__init__creation_list_distributions_length_exceeds_inputs(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="Length of list_of_distributions must equal the number of variables.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2607,9 +2946,9 @@ def test__init__creation_15(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_16(self, array_type): + def test__init__selection_empty_distributions(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="list_of_distributions cannot be empty."): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2619,9 +2958,11 @@ def test__init__selection_16(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_17(self, array_type): + def test__init__selection_distribution_not_list(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(TypeError): + with pytest.raises( + TypeError, match="Error with list_of_distributions: list required." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2631,9 +2972,9 @@ def test__init__selection_17(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_18(self, array_type): + def test__init__selection_distribution_entry_not_string(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(TypeError): + with pytest.raises(TypeError, match="All values in list must be strings"): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2643,9 +2984,12 @@ def test__init__selection_18(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test__init__selection_19(self, array_type): + def test__init__selection_distribution_not_available(self, array_type): input_array = array_type(self.input_array_list) - with pytest.raises(ValueError): + with pytest.raises( + ValueError, + match="list_of_distributions only supports 'random', 'normal' and 'uniform' sampling options.", + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2653,11 +2997,25 @@ def test__init__selection_19(self, array_type): list_of_distributions=["uniform", "gaussian", "normal"], ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_equal_input_output_bounds_one(self, array_type): + input_array = array_type([[0, 0, 0], [0, 1, 1]]) + with pytest.raises(Exception): + CSClass = CustomSampling( + input_array, + number_of_samples=None, + sampling_type=None, + list_of_distributions=["uniform", "normal", "random"], + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_01(self, array_type): + def test__init__samplingtype_nonstring(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + TypeError, match="Invalid sampling type entry. Must be of type ." + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2667,9 +3025,12 @@ def test__init__creation_selection_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) - def test__init__creation_selection_02(self, array_type): + def test__init__samplingtype_undefined_string(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(Exception): + with pytest.raises( + ValueError, + match='Invalid sampling type requirement entered. Enter "creation" for sampling from a range or "selection" for selecting samples from a dataset.', + ): CSClass = CustomSampling( input_array, number_of_samples=None, @@ -2679,7 +3040,7 @@ def test__init__creation_selection_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) - def test_generate_from_dist_01(self, array_type): + def test_generate_from_dist_uniform(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( @@ -2696,7 +3057,7 @@ def test_generate_from_dist_01(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) - def test_generate_from_dist_02(self, array_type): + def test_generate_from_dist_normal(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( @@ -2713,7 +3074,7 @@ def test_generate_from_dist_02(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) - def test_generate_from_dist_03(self, array_type): + def test_generate_from_dist_random(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( @@ -2730,7 +3091,7 @@ def test_generate_from_dist_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) - def test_generate_from_dist_04(self, array_type): + def test_generate_from_dist_all_types(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( @@ -2819,7 +3180,7 @@ def test_sample_points_03(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) - def test_sample_points_04(self, array_type): + def test_sample_points_with_list_input_creation_mode(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array_list) CSClass = CustomSampling( @@ -2836,7 +3197,7 @@ def test_sample_points_04(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [pd.DataFrame]) - def test_sample_points_05(self, array_type): + def test_sample_points_with_pandas_dataframe_input_selection_mode(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.full_data) CSClass = CustomSampling( @@ -2852,7 +3213,7 @@ def test_sample_points_05(self, array_type): @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) - def test_sample_points_06(self, array_type): + def test_sample_points_with_numpy_array_input_selection_mode(self, array_type): for num_samples in [None, 10, 1]: input_array = array_type(self.input_array) CSClass = CustomSampling( From 1bd7d3d42ca4908bb35c3142928e90846d42fe04 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 10:39:01 -0800 Subject: [PATCH 11/19] Edit Gaussian sampling bounds to allow for strict enforcement --- idaes/core/surrogate/pysmo/sampling.py | 43 ++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index c60f618f42..361fcbf030 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1578,6 +1578,7 @@ def __init__( sampling_type=None, xlabels=None, ylabels=None, + strictly_enforce_gaussian_bounds=False, ): """ Initialization of CustomSampling class. Three inputs are required. @@ -1595,6 +1596,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + strictly_enforce_gaussian_bounds (bool): Boolean specifying whether the provided bounds for normal distributions should be strictly enforced. Note that selecting this option may affect the underlying distribution. Default is False. Returns: **self** function containing the input information @@ -1713,6 +1715,8 @@ def __init__( ) self.dist_vector = list_of_distributions + self.normal_bounds_enforced = strictly_enforce_gaussian_bounds + def generate_from_dist(self, dist_name): if dist_name.lower() in ["uniform", "random"]: dist = getattr(np.random.default_rng(), dist_name.lower()) @@ -1721,16 +1725,37 @@ def generate_from_dist(self, dist_name): elif dist_name.lower() == "normal": dist = getattr(np.random.default_rng(), "normal") var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) - if sum( - [1 for i in range(0, var_values.shape[0]) if var_values[i] > 1] - ) + sum([1 for i in range(0, var_values.shape[0]) if var_values[i] < 0]): - warnings.warn( - "Points adjusted to remain within specified Gaussian bounds." + if not self.normal_bounds_enforced: + return dist, np.array(var_values) + else: + if ( + sum([1 for i in range(0, var_values.shape[0]) if var_values[i] > 1]) + + sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] + ) + > 0 + ): + warnings.warn( + "Points adjusted to remain within specified Gaussian bounds. This may affect the underlying distribution." + ) + out_locations = [ + i + for i in range(0, var_values.shape[0]) + if var_values[i] > 1 or var_values[i] < 0 + ] + for k in out_locations: + rep_value = var_values[k] + while (rep_value < 0) or (rep_value > 1): + rep_value = dist(loc=0.5, scale=1 / 6, size=1) + var_values[k] = rep_value + assert ( + sum([1 for i in range(0, var_values.shape[0]) if var_values[i] > 1]) + + sum( + [1 for i in range(0, var_values.shape[0]) if var_values[i] < 0] + ) + == 0 ) - var_values_truncated = np.array( - [1.0 if j > 1.0 else 0.0 if j < 0.0 else j for j in var_values] - ) - return dist, var_values_truncated + return dist, np.array(var_values) def sample_points(self): points_spread = [] From 429dcc4686bdbccc06994c75a958fa8b65489993 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:32:59 -0800 Subject: [PATCH 12/19] Add tests to validate for Gaussian bounds --- .../surrogate/pysmo/tests/test_sampling.py | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index c112fe7da5..42d7ff6a88 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -2575,6 +2575,7 @@ def test__init__selection_right_behaviour_with_none_no_samples(self, array_type) np.testing.assert_array_equal(CSClass.number_of_samples, 5) np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.normal_bounds_enforced == False @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -2592,6 +2593,43 @@ def test__init__selection_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CSClass.number_of_samples, 6) np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.normal_bounds_enforced == False + + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_bounds_option_true(self, array_type): + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + strictly_enforce_gaussian_bounds=True, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.normal_bounds_enforced == True + + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_bounds_option_false( + self, array_type + ): + input_array = array_type(self.input_array) + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + strictly_enforce_gaussian_bounds=False, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.normal_bounds_enforced == False @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) @@ -3009,6 +3047,22 @@ def test__init__creation_equal_input_output_bounds_one(self, array_type): list_of_distributions=["uniform", "normal", "random"], ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_nonboolean_bounds_option(self, array_type): + input_array = array_type([[0, 0, 0], [1, 1, 1]]) + with pytest.raises( + TypeError, + match='Invalid "strictly_enforce_gaussian_bounds" entry. Must be boolean.', + ): + CSClass = CustomSampling( + input_array, + number_of_samples=None, + sampling_type=None, + list_of_distributions=["uniform", "normal", "random"], + strictly_enforce_gaussian_bounds=None, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__samplingtype_nonstring(self, array_type): @@ -3072,6 +3126,35 @@ def test_generate_from_dist_normal(self, array_type): assert scaled_samples.shape == (CSClass.number_of_samples,) assert dist_res.__name__ == dist_type + @pytest.mark.unit + def test_generate_from_dist_normal_unenforced_gaussian_bounds(self): + CSClass = CustomSampling( + [[0], [1]], + number_of_samples=10000, + sampling_type="creation", + list_of_distributions=["normal"], + ) + dist_type = "normal" + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert dist_res.__name__ == dist_type + assert scaled_samples.min() < 0 + assert scaled_samples.max() > 1 + + @pytest.mark.unit + def test_generate_from_dist_normal_enforced_gaussian_bounds(self): + CSClass = CustomSampling( + [[0], [1]], + number_of_samples=10000, + sampling_type="creation", + list_of_distributions=["normal"], + strictly_enforce_gaussian_bounds=True, + ) + dist_type = "normal" + dist_res, scaled_samples = CSClass.generate_from_dist(dist_type) + assert dist_res.__name__ == dist_type + assert scaled_samples.min() >= 0 + assert scaled_samples.max() <= 1 + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array]) def test_generate_from_dist_random(self, array_type): From d54f59252b6c6145fdd0e25db618e1205a62389a Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:06:25 -0800 Subject: [PATCH 13/19] Update test_sampling.py --- idaes/core/surrogate/pysmo/tests/test_sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 42d7ff6a88..2e1fb2334f 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -3060,7 +3060,7 @@ def test__init__creation_nonboolean_bounds_option(self, array_type): number_of_samples=None, sampling_type=None, list_of_distributions=["uniform", "normal", "random"], - strictly_enforce_gaussian_bounds=None, + strictly_enforce_gaussian_bounds='False', ) @pytest.mark.unit From fd9b7d13f17fcb4b7d867cb552e1695695dfbc8d Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 12:13:48 -0800 Subject: [PATCH 14/19] Update test_sampling.py --- idaes/core/surrogate/pysmo/tests/test_sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 2e1fb2334f..9de61c82a4 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -3060,7 +3060,7 @@ def test__init__creation_nonboolean_bounds_option(self, array_type): number_of_samples=None, sampling_type=None, list_of_distributions=["uniform", "normal", "random"], - strictly_enforce_gaussian_bounds='False', + strictly_enforce_gaussian_bounds="False", ) @pytest.mark.unit From dc4de4c5a20019573c006adabc1be646d8630817 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 13:15:00 -0800 Subject: [PATCH 15/19] Add missing check in init --- idaes/core/surrogate/pysmo/sampling.py | 19 +++++++++++++++++-- .../surrogate/pysmo/tests/test_sampling.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index 361fcbf030..f70e39bdb6 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1558,6 +1558,8 @@ class CustomSampling(SamplingMethods): - The distribution to be used per variable needs to be specified in a list. + - Users are urged to visit the documentation for more information about normal distribution-based sampling. + To use: call class with inputs, and then ``sample_points`` function **Example:** @@ -1568,6 +1570,16 @@ class CustomSampling(SamplingMethods): >>> b = rbf.CustomSampling(data, [10, 5], list_of_distributions= ['normal', 'uniform'], sampling_type="selection") >>> samples = b.sample_points() + **Note on Gaussian-based sampling** + + To remain consistent with the other sampling methods and distributions, bounds are required for specifying Gaussian distributions, rather than the mean (:math:`\\bar{x}`) and standard deviation (:math:`\\sigma`). + + Given the mean and standard deviation, the bounds of the distribution may be computed as: + + Lower bound = :math:`\\bar{x} - 3\\sigma` ; Upper bound = :math:`\\bar{x} + 3\\sigma` + + Users should visit the documentation for more information. + """ def __init__( @@ -1581,7 +1593,7 @@ def __init__( strictly_enforce_gaussian_bounds=False, ): """ - Initialization of CustomSampling class. Three inputs are required. + Initialization of CustomSampling class. Four inputs are required. Args: data_input (NumPy Array, Pandas Dataframe or list) : The input data set or range to be sampled. @@ -1609,7 +1621,6 @@ def __init__( IndexError: When invalid column names are supplied in **xlabels** or **ylabels** - """ if sampling_type is None: sampling_type = "creation" @@ -1715,6 +1726,10 @@ def __init__( ) self.dist_vector = list_of_distributions + if not isinstance(strictly_enforce_gaussian_bounds, bool): + raise TypeError( + "Invalid 'strictly_enforce_gaussian_bounds' entry. Must be boolean." + ) self.normal_bounds_enforced = strictly_enforce_gaussian_bounds def generate_from_dist(self, dist_name): diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 9de61c82a4..0c5fbe6608 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -3053,7 +3053,7 @@ def test__init__creation_nonboolean_bounds_option(self, array_type): input_array = array_type([[0, 0, 0], [1, 1, 1]]) with pytest.raises( TypeError, - match='Invalid "strictly_enforce_gaussian_bounds" entry. Must be boolean.', + match="Invalid 'strictly_enforce_gaussian_bounds' entry. Must be boolean.", ): CSClass = CustomSampling( input_array, From 1e00f62afc7eb992d322e06b8a772f7f0d44f113 Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:32:24 -0800 Subject: [PATCH 16/19] Improve docs on Gaussian distribution samples. --- .../surrogate/sampling/pysmo_custom.rst | 24 ++++++++++++++++--- idaes/core/surrogate/pysmo/sampling.py | 14 +++++------ 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst index 401c7f299e..f5eaf39792 100644 --- a/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst +++ b/docs/explanations/modeling_extensions/surrogate/sampling/pysmo_custom.rst @@ -9,9 +9,27 @@ The ``pysmo.sampling.CustomSampling`` method carries out the user-defined sampli We currently support three distributions options for sampling: -* "random", for sampling from a random distribution -* "uniform", for sampling from a uniform distribution -* "normal", for sampling from a Gaussian distribution +* "random", for sampling from a random distribution. +* "uniform", for sampling from a uniform distribution. +* "normal", for sampling from a normal (i.e. Gaussian) distribution. + +.. warning:: + **A note on Gaussian-based sampling** + + To remain consistent with the other sampling methods and distributions, bounds are required for specifying normal distributions, rather than the mean (:math:`\bar{x}`) and standard deviation (:math:`\sigma`). For a normal distribution, 99.7% of the points/sample fall within three standard deviations of the mean. Thus, the bounds of the distribution ay be computed as: + + .. math:: + \begin{equation} + LB = \bar{x} - 3\sigma + \end{equation} + + .. math:: + \begin{equation} + UB = \bar{x} + 3\sigma + \end{equation} + + While almost all of the points generated will typically fall between LB and UB, a few points may be generated outside the bounds (as should be expected from a normal distribution). However, users can choose to enforce the bounds as hard constraints by setting the boolean option **strictly_enforce_gaussian_bounds** to True during initialization. In that case, values exceeding the bounds are replaced by new values generated from the distributions. However, this may affect the underlying distribution. + Available Methods ------------------ diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index f70e39bdb6..f3904a2d42 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -1566,19 +1566,19 @@ class CustomSampling(SamplingMethods): .. code-block:: python - # To select 50 samples on a (10 x 5) grid in a 2D space: - >>> b = rbf.CustomSampling(data, [10, 5], list_of_distributions= ['normal', 'uniform'], sampling_type="selection") + # To select 50 samples drom a dataset: + >>> b = rbf.CustomSampling(data, 50, list_of_distributions= ['normal', 'uniform'], sampling_type="selection") >>> samples = b.sample_points() - **Note on Gaussian-based sampling** + Note: - To remain consistent with the other sampling methods and distributions, bounds are required for specifying Gaussian distributions, rather than the mean (:math:`\\bar{x}`) and standard deviation (:math:`\\sigma`). + To remain consistent with the other sampling methods and distributions, **bounds are required for specifying normal distributions, rather than the mean and standard deviation**. - Given the mean and standard deviation, the bounds of the distribution may be computed as: + Given the mean (:math:`\\bar{x}`) and standard deviation (:math:`\\sigma`), the bounds of the normal distribution may be computed as: - Lower bound = :math:`\\bar{x} - 3\\sigma` ; Upper bound = :math:`\\bar{x} + 3\\sigma` + Lower bound = :math:`\\bar{x} - 3\\sigma` ; Upper bound = :math:`\\bar{x} + 3\\sigma` - Users should visit the documentation for more information. + Users should visit the documentation for more information. """ From 9f36da432ea8872a0d37621f6357d77950ed291d Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:30:11 -0800 Subject: [PATCH 17/19] Update test_sampling.py --- idaes/core/surrogate/pysmo/tests/test_sampling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 0c5fbe6608..67f2daf957 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -3223,6 +3223,7 @@ def test_sample_points_02(self, array_type): number_of_samples=num_samples, sampling_type="creation", list_of_distributions=["random", "normal", "uniform"], + strictly_enforce_gaussian_bounds=True, ) unique_sample_points = CSClass.sample_points() input_array = np.array(input_array) From 3af1da542363c6bd43b26ec7133f1dafc782331b Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Mon, 18 Dec 2023 14:51:50 -0800 Subject: [PATCH 18/19] Add random seed specification option --- idaes/core/surrogate/pysmo/sampling.py | 27 ++- .../surrogate/pysmo/tests/test_sampling.py | 226 ++++++++++++++++++ 2 files changed, 251 insertions(+), 2 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index f3904a2d42..e2851cbe61 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -480,6 +480,7 @@ def __init__( sampling_type=None, xlabels=None, ylabels=None, + rand_seed=None, ): """ Initialization of **LatinHypercubeSampling** class. Two inputs are required. @@ -496,6 +497,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). Returns: **self** function containing the input information @@ -594,6 +596,12 @@ def __init__( self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case + if rand_seed is not None: + if not isinstance(rand_seed, int): + raise TypeError("Random seed must be an integer.") + self.seed_value = rand_seed + np.random.seed(self.seed_value) + def variable_sample_creation(self, variable_min, variable_max): """ @@ -1269,6 +1277,7 @@ def __init__( sampling_type=None, xlabels=None, ylabels=None, + rand_seed=None, ): """ Initialization of CVTSampling class. Two inputs are required, while an optional option to control the solution accuracy may be specified. @@ -1285,6 +1294,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). tolerance(float): Maximum allowable Euclidean distance between centres from consecutive iterations of the algorithm. Termination condition for algorithm. - The smaller the value of tolerance, the better the solution but the longer the algorithm requires to converge. Default value is :math:`10^{-7}`. @@ -1412,6 +1422,12 @@ def __init__( raise Exception("Invalid tolerance input") self.eps = tolerance + if rand_seed is not None: + if not isinstance(rand_seed, int): + raise TypeError("Random seed must be an integer.") + self.seed_value = rand_seed + np.random.seed(self.seed_value) + @staticmethod def random_sample_selection(no_samples, no_features): """ @@ -1591,6 +1607,7 @@ def __init__( xlabels=None, ylabels=None, strictly_enforce_gaussian_bounds=False, + rand_seed=None, ): """ Initialization of CustomSampling class. Four inputs are required. @@ -1608,6 +1625,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). strictly_enforce_gaussian_bounds (bool): Boolean specifying whether the provided bounds for normal distributions should be strictly enforced. Note that selecting this option may affect the underlying distribution. Default is False. Returns: @@ -1732,13 +1750,18 @@ def __init__( ) self.normal_bounds_enforced = strictly_enforce_gaussian_bounds + if rand_seed is not None: + if not isinstance(rand_seed, int): + raise TypeError("Random seed must be an integer.") + self.seed_value = rand_seed + def generate_from_dist(self, dist_name): if dist_name.lower() in ["uniform", "random"]: - dist = getattr(np.random.default_rng(), dist_name.lower()) + dist = getattr(np.random.default_rng(self.seed_value), dist_name.lower()) var_values = np.array(dist(size=self.number_of_samples)) return dist, var_values elif dist_name.lower() == "normal": - dist = getattr(np.random.default_rng(), "normal") + dist = getattr(np.random.default_rng(self.seed_value), "normal") var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) if not self.normal_bounds_enforced: return dist, np.array(var_values) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 67f2daf957..1996c53289 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -492,6 +492,24 @@ def test__init__selection_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(LHSClass.number_of_samples, 6) np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 100 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 6) + np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + assert LHSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -547,6 +565,18 @@ def test__init__selection_wrong_input_data_type(self, array_type): input_array, number_of_samples=None, sampling_type="selection" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(TypeError, match="Random seed must be an integer."): + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + rand_seed=1.2, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): @@ -579,6 +609,21 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(LHSClass.data, input_array) np.testing.assert_array_equal(LHSClass.number_of_samples, 100) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=100, + sampling_type="creation", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 100) + assert LHSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -844,6 +889,33 @@ def test_sample_points_03(self, array_type): ) np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + + LHSClass_A = LatinHypercubeSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_A = LHSClass_A.sample_points() + + LHSClass_B = LatinHypercubeSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_B = LHSClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + class TestUniformSampling: input_array = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(10)] @@ -2013,6 +2085,26 @@ def test__init__selection_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) np.testing.assert_array_equal(CVTClass.eps, 1e-7) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 100 + CVTClass = CVTSampling( + input_array, + number_of_samples=6, + tolerance=None, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 6) + np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) + np.testing.assert_array_equal(CVTClass.eps, 1e-7) + assert CVTClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2112,6 +2204,19 @@ def test__init__selection_tolerance_too_tight(self, array_type): sampling_type="selection", ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(TypeError, match="Random seed must be an integer."): + CVTClass = CVTSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + rand_seed=1.2, + tolerance=None, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_valid_tolerance(self, array_type): @@ -2184,6 +2289,22 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CVTClass.data, input_array) np.testing.assert_array_equal(CVTClass.number_of_centres, 100) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + CVTClass = CVTSampling( + input_array, + number_of_samples=100, + tolerance=None, + sampling_type="creation", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 100) + assert CVTClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -2548,6 +2669,33 @@ def test_sample_points_02(self, array_type): unique_sample_points.shape, ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + + CVTClass_A = CVTSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_A = CVTClass_A.sample_points() + + CVTClass_B = CVTSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_B = CVTClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + class TestCustomSampling: input_array = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(10)] @@ -2631,6 +2779,26 @@ def test__init__selection_right_behaviour_with_bounds_option_false( assert CSClass.dist_vector == ["uniform", "normal"] assert CSClass.normal_bounds_enforced == False + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 1000 + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2788,6 +2956,19 @@ def test__init__selection_distribution_not_available(self, array_type): list_of_distributions=["uniform", "binomial"], ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(TypeError, match="Random seed must be an integer."): + CSClass = CustomSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed=1.2, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_right_hahaviour_with_none_samplingtype(self, array_type): @@ -2832,6 +3013,23 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CSClass.number_of_samples, 100) assert CSClass.dist_vector == ["uniform", "normal", "random"] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + CSClass = CustomSampling( + input_array, + number_of_samples=100, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 100) + assert CSClass.dist_vector == ["uniform", "normal", "random"] + assert CSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -3311,6 +3509,34 @@ def test_sample_points_with_numpy_array_input_selection_mode(self, array_type): assert unique_sample_points.shape[1] == input_array.shape[1] assert type(unique_sample_points) == np.ndarray + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + CSClass_A = CustomSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], + rand_seed=rand_seed, + ) + unique_sample_points_A = CSClass_A.sample_points() + + CSClass_B = CustomSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], + rand_seed=rand_seed, + ) + unique_sample_points_B = CSClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + if __name__ == "__main__": pytest.main() From e2fa3c69d297678742fe46cbe1b1cc24c8dd435a Mon Sep 17 00:00:00 2001 From: OOAmusat <47539353+OOAmusat@users.noreply.github.com> Date: Tue, 19 Dec 2023 08:11:08 -0800 Subject: [PATCH 19/19] Switch seed check to try-except --- idaes/core/surrogate/pysmo/sampling.py | 27 ++++--- .../surrogate/pysmo/tests/test_sampling.py | 70 +++++++++++++++++-- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index e2851cbe61..becd8d9ca6 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -597,10 +597,11 @@ def __init__( self.x_data = bounds_array # Only x data will be present in this case if rand_seed is not None: - if not isinstance(rand_seed, int): - raise TypeError("Random seed must be an integer.") - self.seed_value = rand_seed - np.random.seed(self.seed_value) + try: + self.seed_value = int(rand_seed) + np.random.seed(self.seed_value) + except ValueError: + raise ValueError("Random seed must be an integer.") def variable_sample_creation(self, variable_min, variable_max): """ @@ -1423,10 +1424,11 @@ def __init__( self.eps = tolerance if rand_seed is not None: - if not isinstance(rand_seed, int): - raise TypeError("Random seed must be an integer.") - self.seed_value = rand_seed - np.random.seed(self.seed_value) + try: + self.seed_value = int(rand_seed) + np.random.seed(self.seed_value) + except ValueError: + raise ValueError("Random seed must be an integer.") @staticmethod def random_sample_selection(no_samples, no_features): @@ -1751,9 +1753,12 @@ def __init__( self.normal_bounds_enforced = strictly_enforce_gaussian_bounds if rand_seed is not None: - if not isinstance(rand_seed, int): - raise TypeError("Random seed must be an integer.") - self.seed_value = rand_seed + try: + self.seed_value = int(rand_seed) + except ValueError: + raise ValueError("Random seed must be an integer.") + else: + self.seed_value = rand_seed def generate_from_dist(self, dist_name): if dist_name.lower() in ["uniform", "random"]: diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 1996c53289..c104ea8d6e 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -510,6 +510,24 @@ def test__init__selection_right_behaviour_with_specified_random_seed( np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) assert LHSClass.seed_value == rand_seed + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 15.1 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 6) + np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + assert LHSClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -569,12 +587,12 @@ def test__init__selection_wrong_input_data_type(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_non_integer_random_seed(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(TypeError, match="Random seed must be an integer."): + with pytest.raises(ValueError, match="Random seed must be an integer."): LHSClass = LatinHypercubeSampling( input_array, number_of_samples=5, sampling_type="selection", - rand_seed=1.2, + rand_seed="1.2", ) @pytest.mark.unit @@ -2105,6 +2123,26 @@ def test__init__selection_right_behaviour_with_specified_random_seed( np.testing.assert_array_equal(CVTClass.eps, 1e-7) assert CVTClass.seed_value == rand_seed + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 2.2 + CVTClass = CVTSampling( + input_array, + number_of_samples=6, + tolerance=None, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 6) + np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) + np.testing.assert_array_equal(CVTClass.eps, 1e-7) + assert CVTClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2208,12 +2246,12 @@ def test__init__selection_tolerance_too_tight(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_non_integer_random_seed(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(TypeError, match="Random seed must be an integer."): + with pytest.raises(ValueError, match="Random seed must be an integer."): CVTClass = CVTSampling( input_array, number_of_samples=5, sampling_type="selection", - rand_seed=1.2, + rand_seed="1.2", tolerance=None, ) @@ -2799,6 +2837,26 @@ def test__init__selection_right_behaviour_with_specified_random_seed( assert CSClass.dist_vector == ["uniform", "normal"] assert CSClass.seed_value == rand_seed + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 1.2 + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2960,13 +3018,13 @@ def test__init__selection_distribution_not_available(self, array_type): @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_non_integer_random_seed(self, array_type): input_array = array_type(self.input_array) - with pytest.raises(TypeError, match="Random seed must be an integer."): + with pytest.raises(ValueError, match="Random seed must be an integer."): CSClass = CustomSampling( input_array, number_of_samples=5, sampling_type="selection", list_of_distributions=["uniform", "normal"], - rand_seed=1.2, + rand_seed="1.2", ) @pytest.mark.unit