diff --git a/idaes/core/surrogate/pysmo/sampling.py b/idaes/core/surrogate/pysmo/sampling.py index f3904a2d42..becd8d9ca6 100644 --- a/idaes/core/surrogate/pysmo/sampling.py +++ b/idaes/core/surrogate/pysmo/sampling.py @@ -480,6 +480,7 @@ def __init__( sampling_type=None, xlabels=None, ylabels=None, + rand_seed=None, ): """ Initialization of **LatinHypercubeSampling** class. Two inputs are required. @@ -496,6 +497,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). Returns: **self** function containing the input information @@ -594,6 +596,13 @@ def __init__( self.number_of_samples = number_of_samples self.x_data = bounds_array # Only x data will be present in this case + if rand_seed is not None: + try: + self.seed_value = int(rand_seed) + np.random.seed(self.seed_value) + except ValueError: + raise ValueError("Random seed must be an integer.") + def variable_sample_creation(self, variable_min, variable_max): """ @@ -1269,6 +1278,7 @@ def __init__( sampling_type=None, xlabels=None, ylabels=None, + rand_seed=None, ): """ Initialization of CVTSampling class. Two inputs are required, while an optional option to control the solution accuracy may be specified. @@ -1285,6 +1295,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). tolerance(float): Maximum allowable Euclidean distance between centres from consecutive iterations of the algorithm. Termination condition for algorithm. - The smaller the value of tolerance, the better the solution but the longer the algorithm requires to converge. Default value is :math:`10^{-7}`. @@ -1412,6 +1423,13 @@ def __init__( raise Exception("Invalid tolerance input") self.eps = tolerance + if rand_seed is not None: + try: + self.seed_value = int(rand_seed) + np.random.seed(self.seed_value) + except ValueError: + raise ValueError("Random seed must be an integer.") + @staticmethod def random_sample_selection(no_samples, no_features): """ @@ -1591,6 +1609,7 @@ def __init__( xlabels=None, ylabels=None, strictly_enforce_gaussian_bounds=False, + rand_seed=None, ): """ Initialization of CustomSampling class. Four inputs are required. @@ -1608,6 +1627,7 @@ def __init__( Keyword Args: xlabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the independent/input variables. Only used in "selection" mode. Default is None. ylabels (list): List of column names (if **data_input** is a dataframe) or column numbers (if **data_input** is an array) for the dependent/output variables. Only used in "selection" mode. Default is None. + rand_seed (int): Option that allows users to fix the numpy random seed generator for reproducibility (if required). strictly_enforce_gaussian_bounds (bool): Boolean specifying whether the provided bounds for normal distributions should be strictly enforced. Note that selecting this option may affect the underlying distribution. Default is False. Returns: @@ -1732,13 +1752,21 @@ def __init__( ) self.normal_bounds_enforced = strictly_enforce_gaussian_bounds + if rand_seed is not None: + try: + self.seed_value = int(rand_seed) + except ValueError: + raise ValueError("Random seed must be an integer.") + else: + self.seed_value = rand_seed + def generate_from_dist(self, dist_name): if dist_name.lower() in ["uniform", "random"]: - dist = getattr(np.random.default_rng(), dist_name.lower()) + dist = getattr(np.random.default_rng(self.seed_value), dist_name.lower()) var_values = np.array(dist(size=self.number_of_samples)) return dist, var_values elif dist_name.lower() == "normal": - dist = getattr(np.random.default_rng(), "normal") + dist = getattr(np.random.default_rng(self.seed_value), "normal") var_values = dist(loc=0.5, scale=1 / 6, size=self.number_of_samples) if not self.normal_bounds_enforced: return dist, np.array(var_values) diff --git a/idaes/core/surrogate/pysmo/tests/test_sampling.py b/idaes/core/surrogate/pysmo/tests/test_sampling.py index 67f2daf957..c104ea8d6e 100644 --- a/idaes/core/surrogate/pysmo/tests/test_sampling.py +++ b/idaes/core/surrogate/pysmo/tests/test_sampling.py @@ -492,6 +492,42 @@ def test__init__selection_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(LHSClass.number_of_samples, 6) np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 100 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 6) + np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + assert LHSClass.seed_value == rand_seed + + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 15.1 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 6) + np.testing.assert_array_equal(LHSClass.x_data, np.array(input_array)[:, :-1]) + assert LHSClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -547,6 +583,18 @@ def test__init__selection_wrong_input_data_type(self, array_type): input_array, number_of_samples=None, sampling_type="selection" ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(ValueError, match="Random seed must be an integer."): + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + rand_seed="1.2", + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_right_behaviour_with_none_samplingtype(self, array_type): @@ -579,6 +627,21 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(LHSClass.data, input_array) np.testing.assert_array_equal(LHSClass.number_of_samples, 100) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + LHSClass = LatinHypercubeSampling( + input_array, + number_of_samples=100, + sampling_type="creation", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(LHSClass.data, input_array) + np.testing.assert_array_equal(LHSClass.number_of_samples, 100) + assert LHSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -844,6 +907,33 @@ def test_sample_points_03(self, array_type): ) np.testing.assert_array_equal(expected_testing, out_testing) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + + LHSClass_A = LatinHypercubeSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_A = LHSClass_A.sample_points() + + LHSClass_B = LatinHypercubeSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_B = LHSClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + class TestUniformSampling: input_array = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(10)] @@ -2013,6 +2103,46 @@ def test__init__selection_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) np.testing.assert_array_equal(CVTClass.eps, 1e-7) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 100 + CVTClass = CVTSampling( + input_array, + number_of_samples=6, + tolerance=None, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 6) + np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) + np.testing.assert_array_equal(CVTClass.eps, 1e-7) + assert CVTClass.seed_value == rand_seed + + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 2.2 + CVTClass = CVTSampling( + input_array, + number_of_samples=6, + tolerance=None, + sampling_type="selection", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 6) + np.testing.assert_array_equal(CVTClass.x_data, np.array(input_array)[:, :-1]) + np.testing.assert_array_equal(CVTClass.eps, 1e-7) + assert CVTClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2112,6 +2242,19 @@ def test__init__selection_tolerance_too_tight(self, array_type): sampling_type="selection", ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(ValueError, match="Random seed must be an integer."): + CVTClass = CVTSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + rand_seed="1.2", + tolerance=None, + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_valid_tolerance(self, array_type): @@ -2184,6 +2327,22 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CVTClass.data, input_array) np.testing.assert_array_equal(CVTClass.number_of_centres, 100) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + CVTClass = CVTSampling( + input_array, + number_of_samples=100, + tolerance=None, + sampling_type="creation", + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CVTClass.data, input_array) + np.testing.assert_array_equal(CVTClass.number_of_centres, 100) + assert CVTClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -2548,6 +2707,33 @@ def test_sample_points_02(self, array_type): unique_sample_points.shape, ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + + CVTClass_A = CVTSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_A = CVTClass_A.sample_points() + + CVTClass_B = CVTSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + rand_seed=rand_seed, + ) + unique_sample_points_B = CVTClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + class TestCustomSampling: input_array = [[x, x + 10, (x + 1) ** 2 + x + 10] for x in range(10)] @@ -2631,6 +2817,46 @@ def test__init__selection_right_behaviour_with_bounds_option_false( assert CSClass.dist_vector == ["uniform", "normal"] assert CSClass.normal_bounds_enforced == False + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 1000 + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.seed_value == rand_seed + + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_right_behaviour_with_specified_float_random_seed( + self, array_type + ): + input_array = array_type(self.input_array) + rand_seed = 1.2 + CSClass = CustomSampling( + input_array, + number_of_samples=6, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 6) + np.testing.assert_array_equal(CSClass.x_data, np.array(input_array)[:, :-1]) + assert CSClass.dist_vector == ["uniform", "normal"] + assert CSClass.seed_value == int(rand_seed) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) def test__init__selection_zero_samples(self, array_type): @@ -2788,6 +3014,19 @@ def test__init__selection_distribution_not_available(self, array_type): list_of_distributions=["uniform", "binomial"], ) + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [np.array, pd.DataFrame]) + def test__init__selection_non_integer_random_seed(self, array_type): + input_array = array_type(self.input_array) + with pytest.raises(ValueError, match="Random seed must be an integer."): + CSClass = CustomSampling( + input_array, + number_of_samples=5, + sampling_type="selection", + list_of_distributions=["uniform", "normal"], + rand_seed="1.2", + ) + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_right_hahaviour_with_none_samplingtype(self, array_type): @@ -2832,6 +3071,23 @@ def test__init__creation_right_behaviour_with_specified_no_samples( np.testing.assert_array_equal(CSClass.number_of_samples, 100) assert CSClass.dist_vector == ["uniform", "normal", "random"] + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test__init__creation_right_behaviour_with_specified_seed(self, array_type): + input_array = array_type(self.input_array_list) + rand_seed = 50 + CSClass = CustomSampling( + input_array, + number_of_samples=100, + sampling_type="creation", + list_of_distributions=["uniform", "normal", "random"], + rand_seed=rand_seed, + ) + np.testing.assert_array_equal(CSClass.data, input_array) + np.testing.assert_array_equal(CSClass.number_of_samples, 100) + assert CSClass.dist_vector == ["uniform", "normal", "random"] + assert CSClass.seed_value == rand_seed + @pytest.mark.unit @pytest.mark.parametrize("array_type", [list]) def test__init__creation_zero_samples(self, array_type): @@ -3311,6 +3567,34 @@ def test_sample_points_with_numpy_array_input_selection_mode(self, array_type): assert unique_sample_points.shape[1] == input_array.shape[1] assert type(unique_sample_points) == np.ndarray + @pytest.mark.unit + @pytest.mark.parametrize("array_type", [list]) + def test_sample_points_equality_fixed_seed(self, array_type): + rand_seed = 1000 + for num_samples in [None, 1, 10, 100]: # Test for different number of samples + input_array = array_type(self.input_array_list) + CSClass_A = CustomSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], + rand_seed=rand_seed, + ) + unique_sample_points_A = CSClass_A.sample_points() + + CSClass_B = CustomSampling( + input_array, + number_of_samples=num_samples, + sampling_type="creation", + list_of_distributions=["random", "normal", "uniform"], + rand_seed=rand_seed, + ) + unique_sample_points_B = CSClass_B.sample_points() + + np.testing.assert_array_equal( + unique_sample_points_A, unique_sample_points_B + ) + if __name__ == "__main__": pytest.main()