From 3c38e783f4d82a657c33c721217ce0ff7cdee216 Mon Sep 17 00:00:00 2001
From: Marcus <MarcusNoack@lbl.gov>
Date: Mon, 16 Oct 2023 17:35:04 -0700
Subject: [PATCH] shannon ig removed and relative entropy added

---
 docs/source/examples/SingleTaskTest.ipynb |  39 ++++++--
 examples/SingleTaskTest.ipynb             |  18 +---
 fvgp/gp.py                                | 114 ++++++++++++++++------
 tests/test_fvgp.py                        |   5 +-
 4 files changed, 124 insertions(+), 52 deletions(-)

diff --git a/docs/source/examples/SingleTaskTest.ipynb b/docs/source/examples/SingleTaskTest.ipynb
index 76c27b2..1c9ed7e 100644
--- a/docs/source/examples/SingleTaskTest.ipynb
+++ b/docs/source/examples/SingleTaskTest.ipynb
@@ -13,7 +13,7 @@
    "id": "3748355f",
    "metadata": {},
    "source": [
-    "This is the new test for fvgp version 4.0.0 and later."
+    "This is the new test for fvgp version 4.0.6 and later."
    ]
   },
   {
@@ -24,7 +24,7 @@
    "outputs": [],
    "source": [
     "##first install the newest version of fvgp\n",
-    "#!pip install fvgp==4.0.5"
+    "#!pip install fvgp==4.0.6"
    ]
   },
   {
@@ -82,7 +82,7 @@
     "def f1(x):\n",
     "    return np.sin(5. * x) + np.cos(10. * x) + (2.* (x-0.4)**2) * np.cos(100. * x)\n",
     " \n",
-    "x_data = np.random.rand(100) \n",
+    "x_data = np.random.rand(20) \n",
     "y_data = f1(x_data) + (np.random.rand(len(x_data))-0.5) * 0.5\n",
     "\n",
     "plt.figure(figsize = (15,5))\n",
@@ -207,6 +207,20 @@
     "opt_obj = my_gp1.train_async(hps_bounds)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7749c658",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(10):\n",
+    "    time.sleep(2)\n",
+    "    my_gp1.update_hyperparameters(opt_obj)\n",
+    "    print(my_gp1.hyperparameters)\n",
+    "    print(\"\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -268,7 +282,7 @@
    },
    "outputs": [],
    "source": [
-    "sig3 =  my_gp1.shannon_information_gain_vec(x_pred.reshape(-1,1))[\"sig(x)\"]"
+    "relative_entropy =  my_gp1.gp_relative_information_entropy_set(x_pred.reshape(-1,1))[\"RIE\"]"
    ]
   },
   {
@@ -281,7 +295,7 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize = (16,10))\n",
-    "plt.plot(x_pred,sig3, label = \"shannon\", linewidth = 4)\n",
+    "plt.plot(x_pred,relative_entropy, label = \"relative_entropy\", linewidth = 4)\n",
     "plt.scatter(x_data,y_data, color = 'black')\n",
     "plt.legend()"
    ]
@@ -289,7 +303,20 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9bee22b7",
+   "id": "ba8a08a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We can ask mutual information and total correlation there is given some test data\n",
+    "x_test = np.array([[0.45],[0.45]])\n",
+    "print(\"MI: \",my_gp1.gp_mutual_information(x_test))\n",
+    "print(\"TC: \",my_gp1.gp_total_correlation(x_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc3be5d5",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/examples/SingleTaskTest.ipynb b/examples/SingleTaskTest.ipynb
index cc5d1a5..1c9ed7e 100644
--- a/examples/SingleTaskTest.ipynb
+++ b/examples/SingleTaskTest.ipynb
@@ -13,7 +13,7 @@
    "id": "3748355f",
    "metadata": {},
    "source": [
-    "This is the new test for fvgp version 4.0.0 and later."
+    "This is the new test for fvgp version 4.0.6 and later."
    ]
   },
   {
@@ -24,7 +24,7 @@
    "outputs": [],
    "source": [
     "##first install the newest version of fvgp\n",
-    "#!pip install fvgp==4.0.5"
+    "#!pip install fvgp==4.0.6"
    ]
   },
   {
@@ -282,7 +282,7 @@
    },
    "outputs": [],
    "source": [
-    "sig3 =  my_gp1.shannon_information_gain_vec(x_pred.reshape(-1,1))[\"sig(x)\"]"
+    "relative_entropy =  my_gp1.gp_relative_information_entropy_set(x_pred.reshape(-1,1))[\"RIE\"]"
    ]
   },
   {
@@ -295,21 +295,11 @@
    "outputs": [],
    "source": [
     "plt.figure(figsize = (16,10))\n",
-    "plt.plot(x_pred,sig3, label = \"shannon\", linewidth = 4)\n",
+    "plt.plot(x_pred,relative_entropy, label = \"relative_entropy\", linewidth = 4)\n",
     "plt.scatter(x_data,y_data, color = 'black')\n",
     "plt.legend()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9bee22b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sig3 =  my_gp1.shannon_information_gain(x_pred.reshape(-1,1))[\"sig\"]\n"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/fvgp/gp.py b/fvgp/gp.py
index ff079a5..2e21e39 100755
--- a/fvgp/gp.py
+++ b/fvgp/gp.py
@@ -1779,16 +1779,11 @@ def gp_total_correlation(self,x_pred, x_out = None):
         return {"x":x_pred,
                 "total correlation":self.kl_div(np.zeros((len(joint_covariance))),np.zeros((len(joint_covariance))),joint_covariance,prod_covariance)}
     ###########################################################################
-    def shannon_information_gain(self, x_pred, x_out = None):
+    def gp_relative_information_entropy(self, x_pred, x_out = None):
         """
-        Function to compute the shannon-information --- a well-behaved function 
-        of the predicted drop in entropy --- given
-        a set of points. The shannon_information gain is a scalar, it is proportionate to
-        the mutual infomation of the two random variables f(x_pred) and f(x_data).
-        The mutual information is always positive, as it is a KL divergence, and is bounded
-        from below by 0. The maxima are expected at the data points. Zero is expected far from the
-        data support. This shannon information gain is exp(-total correlation).
-        Parameters
+        Function to compute KL divergence and therefore relative information entropy
+        if the prior distribution over predicted function values and the posterior distribution.
+
         ----------
         x_pred : np.ndarray
             A numpy array of shape (V x D), interpreted as  an array of input point positions.
@@ -1806,40 +1801,101 @@ def shannon_information_gain(self, x_pred, x_out = None):
             if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out)
             if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.")
         elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.")
-
-
-        return {"x": x_pred,
-                "sig":np.exp(-self.gp_total_correlation(x_pred, x_out = None)["total correlation"])}
-
+        kk = self.kernel(x_pred, x_pred,self.hyperparameters,self)  + (np.identity(len(x_pred)) * 1e-9)
+        post_cov = self.posterior_covariance(x_pred, x_out = None)["S"] + (np.identity(len(x_pred)) * 1e-9)
+        return {"x":x_pred,
+                "RIE":self.kl_div(np.zeros((len(x_pred))),np.zeros((len(x_pred))),kk,post_cov)}
     ###########################################################################
-    def shannon_information_gain_vec(self, x_pred, x_out = None):
+    def gp_relative_information_entropy_set(self, x_pred, x_out = None):
         """
-        Function to compute the shannon-information gain of a set of points,
-        but per point, in comparison to fvgp.GP.shannon_information_gain().
-        In this case, the information_gain is a vector.
-        Parameters
+        Function to compute KL divergence and therefore relative information entropy
+        if the prior distribution over predicted function values and the posterior distribution.
+
         ----------
-            x_pred: 1d or 2d numpy array of points, note, these are elements of the
-                    index set which results from a cartesian product of input and output space
+        x_pred : np.ndarray
+            A numpy array of shape (V x D), interpreted as  an array of input point positions.
+        x_out : np.ndarray, optional
+            Output coordinates in case of multi-task GP use; a numpy array of size (N x L), where N is the number of output points,
+            and L is the dimensionality of the output space.
+
         Return
         -------
         solution dictionary : {}
-            Informatino gain per point.
+            Information gain of collective points.
         """
         if isinstance(x_pred,np.ndarray):
             if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d")
             if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out)
             if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.")
         elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.")
-
-
-
-        sig = np.zeros((len(x_pred)))
+        RIE = np.zeros((len(x_pred)))
         for i in range(len(x_pred)):
-            sig[i] = np.exp(-self.gp_mutual_information(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["mutual information"])
+             RIE[i] = self.gp_relative_information_entropy(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["RIE"]
 
-        return {"x": x_pred,
-                "sig(x)":sig}
+        return {"x":x_pred,
+                "RIE":RIE}
+
+#    def shannon_information_gain(self, x_pred, x_out = None):
+#        """
+#        Function to compute the shannon-information --- a well-behaved function 
+#        of the predicted drop in entropy --- given
+#        a set of points. The shannon_information gain is a scalar, it is proportionate to
+#        the mutual infomation of the two random variables f(x_pred) and f(x_data).
+#        The mutual information is always positive, as it is a KL divergence, and is bounded
+#        from below by 0. The maxima are expected at the data points. Zero is expected far from the
+#        data support. This shannon information gain is exp(-total correlation).
+#        Parameters
+#        ----------
+#        x_pred : np.ndarray
+#            A numpy array of shape (V x D), interpreted as  an array of input point positions.
+#        x_out : np.ndarray, optional
+#            Output coordinates in case of multi-task GP use; a numpy array of size (N x L), where N is the number of output points,
+#            and L is the dimensionality of the output space.
+#
+#        Return
+#        -------
+#        solution dictionary : {}
+#            Information gain of collective points.
+#        """
+#        if isinstance(x_pred,np.ndarray):
+#            if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d")
+#            if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out)
+#            if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.")
+#        elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.")
+#
+#
+#        return {"x": x_pred,
+#                "sig":np.exp(-self.gp_total_correlation(x_pred, x_out = None)["total correlation"])}
+#
+#    ###########################################################################
+#    def shannon_information_gain_vec(self, x_pred, x_out = None):
+#        """
+#        Function to compute the shannon-information gain of a set of points,
+#        but per point, in comparison to fvgp.GP.shannon_information_gain().
+#        In this case, the information_gain is a vector.
+#        Parameters
+#        ----------
+#            x_pred: 1d or 2d numpy array of points, note, these are elements of the
+#                    index set which results from a cartesian product of input and output space
+#        Return
+#        -------
+#        solution dictionary : {}
+#            Informatino gain per point.
+#        """
+#        if isinstance(x_pred,np.ndarray):
+#            if np.ndim(x_pred) == 1: raise Exception("x_pred has to be a 2d numpy array, not 1d")
+#            if x_out is not None: x_pred = self._cartesian_product_euclid(x_pred,x_out)
+#            if len(x_pred[0]) != self.input_space_dim: raise Exception("Wrong dimensionality of the input points x_pred.")
+#        elif x_out is not None: raise Exception("Multi-task GPs on non-Euclidean spaces not implemented yet.")
+#
+#
+#
+#        sig = np.zeros((len(x_pred)))
+#        for i in range(len(x_pred)):
+#            sig[i] = np.exp(-self.gp_mutual_information(x_pred[i].reshape(1,len(x_pred[i])), x_out = None)["mutual information"])
+#
+#        return {"x": x_pred,
+#                "sig(x)":sig}
 
     ###########################################################################
     def posterior_probability(self, x_pred, comp_mean, comp_cov, x_out = None):
diff --git a/tests/test_fvgp.py b/tests/test_fvgp.py
index f49c234..4a59150 100755
--- a/tests/test_fvgp.py
+++ b/tests/test_fvgp.py
@@ -51,7 +51,6 @@ def test_single_task_init_basic():
     res = my_gp1.posterior_covariance(x_pred)
     res = my_gp1.posterior_covariance_grad(x_pred,direction=0)
     res = my_gp1.gp_entropy(x_pred)
-    res = my_gp1.shannon_information_gain(x_pred)
     res = my_gp1.squared_exponential_kernel(1,1)
     res = my_gp1.squared_exponential_kernel_robust(1,1)
     res = my_gp1.exponential_kernel(1,1)
@@ -87,14 +86,14 @@ def test_train_basic():
     res = my_gp1.joint_gp_prior_grad(np.random.rand(10,len(x_data[0])),0)
     res = my_gp1.gp_entropy(np.random.rand(10,len(x_data[0])))
     res = my_gp1.gp_entropy_grad(np.random.rand(10,len(x_data[0])),0)
+    res = my_gp1.gp_relative_information_entropy(np.random.rand(10,len(x_data[0])))
+    res = my_gp1.gp_relative_information_entropy_set(np.random.rand(10,len(x_data[0])))
 
     A = np.random.rand(10,10)
     B = A.T @ A
     res = my_gp1.entropy(B)
     res = my_gp1.gp_kl_div(np.random.rand(10,len(x_data[0])), np.random.rand(10), B)
     res = my_gp1.gp_kl_div_grad(np.random.rand(10,len(x_data[0])), np.random.rand(10), B,0)
-    res = my_gp1.shannon_information_gain(np.random.rand(10,len(x_data[0])))
-    res = my_gp1.shannon_information_gain_vec(np.random.rand(10,len(x_data[0])))
     res = my_gp1.posterior_probability(np.random.rand(10,len(x_data[0])), np.random.rand(10), B)
     res = my_gp1.posterior_probability_grad(np.random.rand(10,len(x_data[0])), np.random.rand(10), B, direction = 0)