From 3c38e783f4d82a657c33c721217ce0ff7cdee216 Mon Sep 17 00:00:00 2001 From: Marcus Date: Mon, 16 Oct 2023 17:35:04 -0700 Subject: [PATCH] shannon ig removed and relative entropy added --- docs/source/examples/SingleTaskTest.ipynb | 39 ++++++-- examples/SingleTaskTest.ipynb | 18 +--- fvgp/gp.py | 114 ++++++++++++++++------ tests/test_fvgp.py | 5 +- 4 files changed, 124 insertions(+), 52 deletions(-) diff --git a/docs/source/examples/SingleTaskTest.ipynb b/docs/source/examples/SingleTaskTest.ipynb index 76c27b2..1c9ed7e 100644 --- a/docs/source/examples/SingleTaskTest.ipynb +++ b/docs/source/examples/SingleTaskTest.ipynb @@ -13,7 +13,7 @@ "id": "3748355f", "metadata": {}, "source": [ - "This is the new test for fvgp version 4.0.0 and later." + "This is the new test for fvgp version 4.0.6 and later." ] }, { @@ -24,7 +24,7 @@ "outputs": [], "source": [ "##first install the newest version of fvgp\n", - "#!pip install fvgp==4.0.5" + "#!pip install fvgp==4.0.6" ] }, { @@ -82,7 +82,7 @@ "def f1(x):\n", " return np.sin(5. * x) + np.cos(10. * x) + (2.* (x-0.4)**2) * np.cos(100. * x)\n", " \n", - "x_data = np.random.rand(100) \n", + "x_data = np.random.rand(20) \n", "y_data = f1(x_data) + (np.random.rand(len(x_data))-0.5) * 0.5\n", "\n", "plt.figure(figsize = (15,5))\n", @@ -207,6 +207,20 @@ "opt_obj = my_gp1.train_async(hps_bounds)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7749c658", + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(10):\n", + " time.sleep(2)\n", + " my_gp1.update_hyperparameters(opt_obj)\n", + " print(my_gp1.hyperparameters)\n", + " print(\"\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -268,7 +282,7 @@ }, "outputs": [], "source": [ - "sig3 = my_gp1.shannon_information_gain_vec(x_pred.reshape(-1,1))[\"sig(x)\"]" + "relative_entropy = my_gp1.gp_relative_information_entropy_set(x_pred.reshape(-1,1))[\"RIE\"]" ] }, { @@ -281,7 +295,7 @@ "outputs": [], "source": [ "plt.figure(figsize = (16,10))\n", - "plt.plot(x_pred,sig3, label = \"shannon\", linewidth = 4)\n", + "plt.plot(x_pred,relative_entropy, label = \"relative_entropy\", linewidth = 4)\n", "plt.scatter(x_data,y_data, color = 'black')\n", "plt.legend()" ] @@ -289,7 +303,20 @@ { "cell_type": "code", "execution_count": null, - "id": "9bee22b7", + "id": "ba8a08a8", + "metadata": {}, + "outputs": [], + "source": [ + "#We can ask mutual information and total correlation there is given some test data\n", + "x_test = np.array([[0.45],[0.45]])\n", + "print(\"MI: \",my_gp1.gp_mutual_information(x_test))\n", + "print(\"TC: \",my_gp1.gp_total_correlation(x_test))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc3be5d5", "metadata": {}, "outputs": [], "source": [] diff --git a/examples/SingleTaskTest.ipynb b/examples/SingleTaskTest.ipynb index cc5d1a5..1c9ed7e 100644 --- a/examples/SingleTaskTest.ipynb +++ b/examples/SingleTaskTest.ipynb @@ -13,7 +13,7 @@ "id": "3748355f", "metadata": {}, "source": [ - "This is the new test for fvgp version 4.0.0 and later." + "This is the new test for fvgp version 4.0.6 and later." ] }, { @@ -24,7 +24,7 @@ "outputs": [], "source": [ "##first install the newest version of fvgp\n", - "#!pip install fvgp==4.0.5" + "#!pip install fvgp==4.0.6" ] }, { @@ -282,7 +282,7 @@ }, "outputs": [], "source": [ - "sig3 = my_gp1.shannon_information_gain_vec(x_pred.reshape(-1,1))[\"sig(x)\"]" + "relative_entropy = my_gp1.gp_relative_information_entropy_set(x_pred.reshape(-1,1))[\"RIE\"]" ] }, { @@ -295,21 +295,11 @@ "outputs": [], "source": [ "plt.figure(figsize = (16,10))\n", - "plt.plot(x_pred,sig3, label = \"shannon\", linewidth = 4)\n", + "plt.plot(x_pred,relative_entropy, label = \"relative_entropy\", linewidth = 4)\n", "plt.scatter(x_data,y_data, color = 'black')\n", "plt.legend()" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "9bee22b7", - "metadata": {}, - "outputs": [], - "source": [ - "sig3 = my_gp1.shannon_information_gain(x_pred.reshape(-1,1))[\"sig\"]\n" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/fvgp/gp.py b/fvgp/gp.py index ff079a5..2e21e39 100755 --- a/fvgp/gp.py +++ b/fvgp/gp.py @@ -1779,16 +1779,11 @@ def gp_total_correlation(self,x_pred, x_out = None): return {"x":x_pred, "total correlation":self.kl_div(np.zeros((len(joint_covariance))),np.zeros((len(joint_covariance))),joint_covariance,prod_covariance)} ########################################################################### - def shannon_information_gain(self, x_pred, x_out = None): + def gp_relative_information_entropy(self, x_pred, x_out = None): """ - Function to compute the shannon-information --- a well-behaved function - of the predicted drop in entropy --- given - a set of points. The shannon_information gain is a scalar, it is proportionate to - the mutual infomation of the two random variables f(x_pred) and f(x_data). - The mutual information is always positive, as it is a KL divergence, and is bounded - from below by 0. The maxima are expected at the data points. Zero is expected far from the - data support. This shannon information gain is exp(-total correlation). - Parameters + Function to compute KL divergence and therefore relative information entropy + if the prior distribution over predicted function values and the posterior distribution. + ---------- x_pred : np.ndarray A numpy array of shape (V x D), interpreted as an array of input point positions. @@ -1806,40 +1801,101 @@ def shannon_information_gain(self, x_pred, x_out = None): if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out) if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.") elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.") - - - return {"x": x_pred, - "sig":np.exp(-self.gp_total_correlation(x_pred, x_out = None)["total correlation"])} - + kk = self.kernel(x_pred, x_pred,self.hyperparameters,self) + (np.identity(len(x_pred)) * 1e-9) + post_cov = self.posterior_covariance(x_pred, x_out = None)["S"] + (np.identity(len(x_pred)) * 1e-9) + return {"x":x_pred, + "RIE":self.kl_div(np.zeros((len(x_pred))),np.zeros((len(x_pred))),kk,post_cov)} ########################################################################### - def shannon_information_gain_vec(self, x_pred, x_out = None): + def gp_relative_information_entropy_set(self, x_pred, x_out = None): """ - Function to compute the shannon-information gain of a set of points, - but per point, in comparison to fvgp.GP.shannon_information_gain(). - In this case, the information_gain is a vector. - Parameters + Function to compute KL divergence and therefore relative information entropy + if the prior distribution over predicted function values and the posterior distribution. + ---------- - x_pred: 1d or 2d numpy array of points, note, these are elements of the - index set which results from a cartesian product of input and output space + x_pred : np.ndarray + A numpy array of shape (V x D), interpreted as an array of input point positions. + x_out : np.ndarray, optional + Output coordinates in case of multi-task GP use; a numpy array of size (N x L), where N is the number of output points, + and L is the dimensionality of the output space. + Return ------- solution dictionary : {} - Informatino gain per point. + Information gain of collective points. """ if isinstance(x_pred,np.ndarray): if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d") if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out) if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.") elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.") - - - - sig = np.zeros((len(x_pred))) + RIE = np.zeros((len(x_pred))) for i in range(len(x_pred)): - sig[i] = np.exp(-self.gp_mutual_information(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["mutual information"]) + RIE[i] = self.gp_relative_information_entropy(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["RIE"] - return {"x": x_pred, - "sig(x)":sig} + return {"x":x_pred, + "RIE":RIE} + +# def shannon_information_gain(self, x_pred, x_out = None): +# """ +# Function to compute the shannon-information --- a well-behaved function +# of the predicted drop in entropy --- given +# a set of points. The shannon_information gain is a scalar, it is proportionate to +# the mutual infomation of the two random variables f(x_pred) and f(x_data). +# The mutual information is always positive, as it is a KL divergence, and is bounded +# from below by 0. The maxima are expected at the data points. Zero is expected far from the +# data support. This shannon information gain is exp(-total correlation). +# Parameters +# ---------- +# x_pred : np.ndarray +# A numpy array of shape (V x D), interpreted as an array of input point positions. +# x_out : np.ndarray, optional +# Output coordinates in case of multi-task GP use; a numpy array of size (N x L), where N is the number of output points, +# and L is the dimensionality of the output space. +# +# Return +# ------- +# solution dictionary : {} +# Information gain of collective points. +# """ +# if isinstance(x_pred,np.ndarray): +# if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d") +# if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out) +# if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.") +# elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.") +# +# +# return {"x": x_pred, +# "sig":np.exp(-self.gp_total_correlation(x_pred, x_out = None)["total correlation"])} +# +# ########################################################################### +# def shannon_information_gain_vec(self, x_pred, x_out = None): +# """ +# Function to compute the shannon-information gain of a set of points, +# but per point, in comparison to fvgp.GP.shannon_information_gain(). +# In this case, the information_gain is a vector. +# Parameters +# ---------- +# x_pred: 1d or 2d numpy array of points, note, these are elements of the +# index set which results from a cartesian product of input and output space +# Return +# ------- +# solution dictionary : {} +# Informatino gain per point. +# """ +# if isinstance(x_pred,np.ndarray): +# if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d") +# if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out) +# if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.") +# elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.") +# +# +# +# sig = np.zeros((len(x_pred))) +# for i in range(len(x_pred)): +# sig[i] = np.exp(-self.gp_mutual_information(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["mutual information"]) +# +# return {"x": x_pred, +# "sig(x)":sig} ########################################################################### def posterior_probability(self, x_pred, comp_mean, comp_cov, x_out = None): diff --git a/tests/test_fvgp.py b/tests/test_fvgp.py index f49c234..4a59150 100755 --- a/tests/test_fvgp.py +++ b/tests/test_fvgp.py @@ -51,7 +51,6 @@ def test_single_task_init_basic(): res = my_gp1.posterior_covariance(x_pred) res = my_gp1.posterior_covariance_grad(x_pred,direction=0) res = my_gp1.gp_entropy(x_pred) - res = my_gp1.shannon_information_gain(x_pred) res = my_gp1.squared_exponential_kernel(1,1) res = my_gp1.squared_exponential_kernel_robust(1,1) res = my_gp1.exponential_kernel(1,1) @@ -87,14 +86,14 @@ def test_train_basic(): res = my_gp1.joint_gp_prior_grad(np.random.rand(10,len(x_data[0])),0) res = my_gp1.gp_entropy(np.random.rand(10,len(x_data[0]))) res = my_gp1.gp_entropy_grad(np.random.rand(10,len(x_data[0])),0) + res = my_gp1.gp_relative_information_entropy(np.random.rand(10,len(x_data[0]))) + res = my_gp1.gp_relative_information_entropy_set(np.random.rand(10,len(x_data[0]))) A = np.random.rand(10,10) B = A.T @ A res = my_gp1.entropy(B) res = my_gp1.gp_kl_div(np.random.rand(10,len(x_data[0])), np.random.rand(10), B) res = my_gp1.gp_kl_div_grad(np.random.rand(10,len(x_data[0])), np.random.rand(10), B,0) - res = my_gp1.shannon_information_gain(np.random.rand(10,len(x_data[0]))) - res = my_gp1.shannon_information_gain_vec(np.random.rand(10,len(x_data[0]))) res = my_gp1.posterior_probability(np.random.rand(10,len(x_data[0])), np.random.rand(10), B) res = my_gp1.posterior_probability_grad(np.random.rand(10,len(x_data[0])), np.random.rand(10), B, direction = 0)